limernyou commited on
Commit
d309c05
·
verified ·
1 Parent(s): 313a490

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +288 -0
  2. checkpoint-100/optimizer_0/.metadata +3 -0
  3. checkpoint-100/optimizer_0/__0_0.distcp +3 -0
  4. checkpoint-100/optimizer_0/__1_0.distcp +3 -0
  5. checkpoint-100/optimizer_0/__2_0.distcp +3 -0
  6. checkpoint-100/optimizer_0/__3_0.distcp +3 -0
  7. checkpoint-100/optimizer_0/__4_0.distcp +3 -0
  8. checkpoint-100/optimizer_0/__5_0.distcp +3 -0
  9. checkpoint-100/optimizer_0/__6_0.distcp +3 -0
  10. checkpoint-100/optimizer_0/__7_0.distcp +3 -0
  11. checkpoint-100/pytorch_model_fsdp_0/.metadata +3 -0
  12. checkpoint-100/pytorch_model_fsdp_0/__0_0.distcp +3 -0
  13. checkpoint-100/pytorch_model_fsdp_0/__1_0.distcp +3 -0
  14. checkpoint-100/pytorch_model_fsdp_0/__2_0.distcp +3 -0
  15. checkpoint-100/pytorch_model_fsdp_0/__3_0.distcp +3 -0
  16. checkpoint-100/pytorch_model_fsdp_0/__4_0.distcp +3 -0
  17. checkpoint-100/pytorch_model_fsdp_0/__5_0.distcp +3 -0
  18. checkpoint-100/pytorch_model_fsdp_0/__6_0.distcp +3 -0
  19. checkpoint-100/pytorch_model_fsdp_0/__7_0.distcp +3 -0
  20. checkpoint-100/rng_state_0.pth +3 -0
  21. checkpoint-100/rng_state_1.pth +3 -0
  22. checkpoint-100/rng_state_2.pth +3 -0
  23. checkpoint-100/rng_state_3.pth +3 -0
  24. checkpoint-100/rng_state_4.pth +3 -0
  25. checkpoint-100/rng_state_5.pth +3 -0
  26. checkpoint-100/rng_state_6.pth +3 -0
  27. checkpoint-100/rng_state_7.pth +3 -0
  28. checkpoint-100/scheduler.pt +3 -0
  29. checkpoint-100/trainer_state.json +70 -0
  30. checkpoint-1000/optimizer_0/.metadata +3 -0
  31. checkpoint-1000/optimizer_0/__0_0.distcp +3 -0
  32. checkpoint-1000/optimizer_0/__1_0.distcp +3 -0
  33. checkpoint-1000/optimizer_0/__2_0.distcp +3 -0
  34. checkpoint-1000/optimizer_0/__3_0.distcp +3 -0
  35. checkpoint-1000/optimizer_0/__4_0.distcp +3 -0
  36. checkpoint-1000/optimizer_0/__5_0.distcp +3 -0
  37. checkpoint-1000/optimizer_0/__6_0.distcp +3 -0
  38. checkpoint-1000/optimizer_0/__7_0.distcp +3 -0
  39. checkpoint-1000/pytorch_model_fsdp_0/.metadata +3 -0
  40. checkpoint-1000/pytorch_model_fsdp_0/__0_0.distcp +3 -0
  41. checkpoint-1000/pytorch_model_fsdp_0/__1_0.distcp +3 -0
  42. checkpoint-1000/pytorch_model_fsdp_0/__2_0.distcp +3 -0
  43. checkpoint-1000/pytorch_model_fsdp_0/__3_0.distcp +3 -0
  44. checkpoint-1000/pytorch_model_fsdp_0/__4_0.distcp +3 -0
  45. checkpoint-1000/pytorch_model_fsdp_0/__5_0.distcp +3 -0
  46. checkpoint-1000/pytorch_model_fsdp_0/__6_0.distcp +3 -0
  47. checkpoint-1000/pytorch_model_fsdp_0/__7_0.distcp +3 -0
  48. checkpoint-1000/rng_state_0.pth +3 -0
  49. checkpoint-1000/rng_state_1.pth +3 -0
  50. checkpoint-1000/rng_state_2.pth +3 -0
.gitattributes CHANGED
@@ -33,3 +33,291 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoint-100/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
37
+ checkpoint-100/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
38
+ checkpoint-100/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
39
+ checkpoint-100/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
40
+ checkpoint-100/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
41
+ checkpoint-100/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
42
+ checkpoint-100/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
43
+ checkpoint-100/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
44
+ checkpoint-100/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
45
+ checkpoint-100/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
46
+ checkpoint-100/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
47
+ checkpoint-100/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
48
+ checkpoint-100/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
49
+ checkpoint-100/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
50
+ checkpoint-100/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
51
+ checkpoint-100/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
52
+ checkpoint-100/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
53
+ checkpoint-100/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
54
+ checkpoint-1000/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
55
+ checkpoint-1000/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
56
+ checkpoint-1000/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
57
+ checkpoint-1000/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
58
+ checkpoint-1000/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
59
+ checkpoint-1000/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
60
+ checkpoint-1000/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
61
+ checkpoint-1000/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
62
+ checkpoint-1000/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
63
+ checkpoint-1000/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
64
+ checkpoint-1000/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
65
+ checkpoint-1000/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
66
+ checkpoint-1000/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
67
+ checkpoint-1000/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
68
+ checkpoint-1000/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
69
+ checkpoint-1000/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
70
+ checkpoint-1000/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
71
+ checkpoint-1000/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
72
+ checkpoint-1100/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
73
+ checkpoint-1100/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
74
+ checkpoint-1100/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
75
+ checkpoint-1100/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
76
+ checkpoint-1100/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
77
+ checkpoint-1100/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
78
+ checkpoint-1100/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
79
+ checkpoint-1100/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
80
+ checkpoint-1100/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
81
+ checkpoint-1100/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
82
+ checkpoint-1100/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
83
+ checkpoint-1100/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
84
+ checkpoint-1100/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
85
+ checkpoint-1100/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
86
+ checkpoint-1100/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
87
+ checkpoint-1100/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
88
+ checkpoint-1100/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
89
+ checkpoint-1100/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
90
+ checkpoint-1200/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
91
+ checkpoint-1200/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
92
+ checkpoint-1200/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
93
+ checkpoint-1200/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
94
+ checkpoint-1200/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
95
+ checkpoint-1200/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
96
+ checkpoint-1200/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
97
+ checkpoint-1200/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
98
+ checkpoint-1200/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
99
+ checkpoint-1200/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
100
+ checkpoint-1200/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
101
+ checkpoint-1200/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
102
+ checkpoint-1200/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
103
+ checkpoint-1200/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
104
+ checkpoint-1200/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
105
+ checkpoint-1200/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
106
+ checkpoint-1200/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
107
+ checkpoint-1200/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
108
+ checkpoint-1300/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
109
+ checkpoint-1300/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
110
+ checkpoint-1300/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
111
+ checkpoint-1300/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
112
+ checkpoint-1300/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
113
+ checkpoint-1300/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
114
+ checkpoint-1300/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
115
+ checkpoint-1300/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
116
+ checkpoint-1300/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
117
+ checkpoint-1300/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
118
+ checkpoint-1300/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
119
+ checkpoint-1300/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
120
+ checkpoint-1300/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
121
+ checkpoint-1300/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
122
+ checkpoint-1300/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
123
+ checkpoint-1300/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
124
+ checkpoint-1300/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
125
+ checkpoint-1300/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
126
+ checkpoint-1400/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
127
+ checkpoint-1400/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
128
+ checkpoint-1400/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
129
+ checkpoint-1400/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
130
+ checkpoint-1400/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
131
+ checkpoint-1400/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
132
+ checkpoint-1400/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
133
+ checkpoint-1400/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
134
+ checkpoint-1400/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
135
+ checkpoint-1400/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
136
+ checkpoint-1400/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
137
+ checkpoint-1400/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
138
+ checkpoint-1400/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
139
+ checkpoint-1400/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
140
+ checkpoint-1400/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
141
+ checkpoint-1400/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
142
+ checkpoint-1400/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
143
+ checkpoint-1400/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
144
+ checkpoint-1500/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
145
+ checkpoint-1500/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
146
+ checkpoint-1500/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
147
+ checkpoint-1500/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
148
+ checkpoint-1500/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
149
+ checkpoint-1500/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
150
+ checkpoint-1500/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
151
+ checkpoint-1500/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
152
+ checkpoint-1500/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
153
+ checkpoint-1500/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
154
+ checkpoint-1500/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
155
+ checkpoint-1500/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
156
+ checkpoint-1500/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
157
+ checkpoint-1500/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
158
+ checkpoint-1500/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
159
+ checkpoint-1500/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
160
+ checkpoint-1500/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
161
+ checkpoint-1500/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
162
+ checkpoint-1600/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
163
+ checkpoint-1600/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
164
+ checkpoint-1600/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
165
+ checkpoint-1600/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
166
+ checkpoint-1600/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
167
+ checkpoint-1600/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
168
+ checkpoint-1600/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
169
+ checkpoint-1600/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
170
+ checkpoint-1600/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
171
+ checkpoint-1600/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
172
+ checkpoint-1600/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
173
+ checkpoint-1600/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
174
+ checkpoint-1600/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
175
+ checkpoint-1600/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
176
+ checkpoint-1600/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
177
+ checkpoint-1600/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
178
+ checkpoint-1600/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
179
+ checkpoint-1600/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
180
+ checkpoint-200/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
181
+ checkpoint-200/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
182
+ checkpoint-200/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
183
+ checkpoint-200/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
184
+ checkpoint-200/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
185
+ checkpoint-200/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
186
+ checkpoint-200/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
187
+ checkpoint-200/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
188
+ checkpoint-200/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
189
+ checkpoint-200/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
190
+ checkpoint-200/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
191
+ checkpoint-200/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
192
+ checkpoint-200/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
193
+ checkpoint-200/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
194
+ checkpoint-200/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
195
+ checkpoint-200/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
196
+ checkpoint-200/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
197
+ checkpoint-200/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
198
+ checkpoint-300/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
199
+ checkpoint-300/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
200
+ checkpoint-300/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
201
+ checkpoint-300/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
202
+ checkpoint-300/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
203
+ checkpoint-300/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
204
+ checkpoint-300/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
205
+ checkpoint-300/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
206
+ checkpoint-300/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
207
+ checkpoint-300/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
208
+ checkpoint-300/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
209
+ checkpoint-300/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
210
+ checkpoint-300/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
211
+ checkpoint-300/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
212
+ checkpoint-300/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
213
+ checkpoint-300/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
214
+ checkpoint-300/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
215
+ checkpoint-300/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
216
+ checkpoint-400/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
217
+ checkpoint-400/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
218
+ checkpoint-400/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
219
+ checkpoint-400/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
220
+ checkpoint-400/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
221
+ checkpoint-400/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
222
+ checkpoint-400/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
223
+ checkpoint-400/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
224
+ checkpoint-400/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
225
+ checkpoint-400/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
226
+ checkpoint-400/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
227
+ checkpoint-400/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
228
+ checkpoint-400/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
229
+ checkpoint-400/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
230
+ checkpoint-400/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
231
+ checkpoint-400/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
232
+ checkpoint-400/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
233
+ checkpoint-400/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
234
+ checkpoint-500/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
235
+ checkpoint-500/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
236
+ checkpoint-500/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
237
+ checkpoint-500/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
238
+ checkpoint-500/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
239
+ checkpoint-500/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
240
+ checkpoint-500/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
241
+ checkpoint-500/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
242
+ checkpoint-500/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
243
+ checkpoint-500/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
244
+ checkpoint-500/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
245
+ checkpoint-500/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
246
+ checkpoint-500/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
247
+ checkpoint-500/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
248
+ checkpoint-500/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
249
+ checkpoint-500/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
250
+ checkpoint-500/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
251
+ checkpoint-500/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
252
+ checkpoint-600/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
253
+ checkpoint-600/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
254
+ checkpoint-600/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
255
+ checkpoint-600/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
256
+ checkpoint-600/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
257
+ checkpoint-600/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
258
+ checkpoint-600/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
259
+ checkpoint-600/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
260
+ checkpoint-600/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
261
+ checkpoint-600/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
262
+ checkpoint-600/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
263
+ checkpoint-600/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
264
+ checkpoint-600/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
265
+ checkpoint-600/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
266
+ checkpoint-600/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
267
+ checkpoint-600/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
268
+ checkpoint-600/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
269
+ checkpoint-600/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
270
+ checkpoint-700/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
271
+ checkpoint-700/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
272
+ checkpoint-700/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
273
+ checkpoint-700/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
274
+ checkpoint-700/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
275
+ checkpoint-700/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
276
+ checkpoint-700/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
277
+ checkpoint-700/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
278
+ checkpoint-700/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
279
+ checkpoint-700/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
280
+ checkpoint-700/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
281
+ checkpoint-700/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
282
+ checkpoint-700/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
283
+ checkpoint-700/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
284
+ checkpoint-700/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
285
+ checkpoint-700/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
286
+ checkpoint-700/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
287
+ checkpoint-700/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
288
+ checkpoint-800/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
289
+ checkpoint-800/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
290
+ checkpoint-800/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
291
+ checkpoint-800/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
292
+ checkpoint-800/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
293
+ checkpoint-800/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
294
+ checkpoint-800/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
295
+ checkpoint-800/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
296
+ checkpoint-800/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
297
+ checkpoint-800/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
298
+ checkpoint-800/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
299
+ checkpoint-800/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
300
+ checkpoint-800/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
301
+ checkpoint-800/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
302
+ checkpoint-800/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
303
+ checkpoint-800/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
304
+ checkpoint-800/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
305
+ checkpoint-800/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
306
+ checkpoint-900/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
307
+ checkpoint-900/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
308
+ checkpoint-900/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
309
+ checkpoint-900/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
310
+ checkpoint-900/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
311
+ checkpoint-900/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
312
+ checkpoint-900/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
313
+ checkpoint-900/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
314
+ checkpoint-900/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
315
+ checkpoint-900/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
316
+ checkpoint-900/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
317
+ checkpoint-900/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
318
+ checkpoint-900/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
319
+ checkpoint-900/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
320
+ checkpoint-900/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
321
+ checkpoint-900/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
322
+ checkpoint-900/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
323
+ checkpoint-900/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
checkpoint-100/optimizer_0/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0edc8d2dfeb47a02ebf14c13ae8e8bce201146e0bc5a503c06dc5c3badd9be10
3
+ size 438489
checkpoint-100/optimizer_0/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d1c28d3f689644a47e74d4f1ee5b7dc5cc044565b1315bff8e8a2aeefe9f413
3
+ size 2980252
checkpoint-100/optimizer_0/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:030628a66629a76363540bb2818b91de7f253b19406440a21717419a62ab3717
3
+ size 2997320
checkpoint-100/optimizer_0/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a9391201a9da96a4fdbab7b6a4cf47168091bd0dd1c44867765ccc1d3e73476
3
+ size 2997320
checkpoint-100/optimizer_0/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9fc15f10515d2e4ee6b71f505f05d1a7bea77c1dba9499fe4eb2072451c7d6d
3
+ size 2997320
checkpoint-100/optimizer_0/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d4916eaf545376ba9d5f9f57f76790687595cb6801bf059b0993682353a07e1
3
+ size 2997320
checkpoint-100/optimizer_0/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f939ef56e2f24636e7e6a98bb1c60ab4d6b639f6c7ff2d28b3829debe688ed93
3
+ size 2999596
checkpoint-100/optimizer_0/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3865669be6d30c5583dd026a7ecba0ea760ae9771b16578f04bef63370b91163
3
+ size 2998732
checkpoint-100/optimizer_0/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fd2f18ecfb8649864f2037130c72ee56e478582b36b01305fe71bb7afc22e14
3
+ size 3005708
checkpoint-100/pytorch_model_fsdp_0/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68cfc031ba4ee5e17d54df02e4118bf33d20b307af59f234ca2e2343fa219903
3
+ size 170758
checkpoint-100/pytorch_model_fsdp_0/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1c031aaa4d9850d16cd4fd5b0d84e9577b3d28cc0ee02f5d3e5b600380a7ad8
3
+ size 1489536
checkpoint-100/pytorch_model_fsdp_0/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ec61051deaf3fa23eb94f1031f932e14bdce8cd6bcdc1fca81330b5d598e77d
3
+ size 1489536
checkpoint-100/pytorch_model_fsdp_0/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30d8685570a1f594d1e21439e41cebd424ca6492cf61e263820d15e7a0745ddc
3
+ size 1489536
checkpoint-100/pytorch_model_fsdp_0/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ca77920177e57b80c4563dad6c5d896feb2765409bd5c898525431bba4553e4
3
+ size 1489536
checkpoint-100/pytorch_model_fsdp_0/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:938b895764f35aa99aa6c7aff0eae60410fc3881558def1bf6ddfd6a293e6219
3
+ size 1489536
checkpoint-100/pytorch_model_fsdp_0/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1fb9b9484c9ce197cb68b5700a7090a302cef24375116457588188830d5aff1
3
+ size 1489536
checkpoint-100/pytorch_model_fsdp_0/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52a2ed377073d705368c5982737897b617055ea31e7890f447142c68c57ad89b
3
+ size 1489536
checkpoint-100/pytorch_model_fsdp_0/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2f02548ea862d75fd09dcf614ff8b5a6c75347e7603902808fffc6f60f1fc8d
3
+ size 1489536
checkpoint-100/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e67ac3c4038beb665d2cc4bd735b6f05977897a2757187e8c7c8e6b89fa4ad3d
3
+ size 15920
checkpoint-100/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:219fc23b677e769ae4d4806c12e3df4ff2b78c28b311847bec2ecb99a35a51e6
3
+ size 15984
checkpoint-100/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a352b51cb68b5d8818a7a28f74f32dffb1095d7e281ff4022f0e365fe98a8ee0
3
+ size 15984
checkpoint-100/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df9e8f9785d0d17d62f1ba8a141384aa7a6438a53ecad5f21d877594c31b45d0
3
+ size 15984
checkpoint-100/rng_state_4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a770deea02d60eea9348a15bd08ac4f95c99d6b5b113eb31bc2fa7631dba1988
3
+ size 15984
checkpoint-100/rng_state_5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72b60fb34596e1922ede5c5a2479b5b98033b4b988dc300df633c12f5e255755
3
+ size 15984
checkpoint-100/rng_state_6.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84274411027a8eb72ed1179f8209a875b6f4101ac7c2790eaef04102df49af52
3
+ size 15984
checkpoint-100/rng_state_7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bbc89ce90c5e7e9f362eb48f3be0a6f39aee82e598c876d2d126ef971bbbfdc
3
+ size 15984
checkpoint-100/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b84ab1237abc7bd4d31945126355c5b6d9e26cb338d88dae9fd60030b2e1fb3
3
+ size 1064
checkpoint-100/trainer_state.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.05,
6
+ "eval_steps": 100,
7
+ "global_step": 100,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0125,
14
+ "grad_norm": 0.08136817067861557,
15
+ "learning_rate": 0.0004,
16
+ "loss": 1.158,
17
+ "step": 25
18
+ },
19
+ {
20
+ "epoch": 0.025,
21
+ "grad_norm": 0.0653829574584961,
22
+ "learning_rate": 0.0004998852503731983,
23
+ "loss": 1.0957,
24
+ "step": 50
25
+ },
26
+ {
27
+ "epoch": 0.0375,
28
+ "grad_norm": 0.11592712253332138,
29
+ "learning_rate": 0.0004993848168027977,
30
+ "loss": 0.9276,
31
+ "step": 75
32
+ },
33
+ {
34
+ "epoch": 0.05,
35
+ "grad_norm": 0.08926476538181305,
36
+ "learning_rate": 0.0004984880506341147,
37
+ "loss": 1.0337,
38
+ "step": 100
39
+ },
40
+ {
41
+ "epoch": 0.05,
42
+ "eval_loss": 0.9111642837524414,
43
+ "eval_runtime": 843.8058,
44
+ "eval_samples_per_second": 1.3,
45
+ "eval_steps_per_second": 0.021,
46
+ "step": 100
47
+ }
48
+ ],
49
+ "logging_steps": 25,
50
+ "max_steps": 2000,
51
+ "num_input_tokens_seen": 0,
52
+ "num_train_epochs": 9223372036854775807,
53
+ "save_steps": 100,
54
+ "stateful_callbacks": {
55
+ "TrainerControl": {
56
+ "args": {
57
+ "should_epoch_stop": false,
58
+ "should_evaluate": false,
59
+ "should_log": false,
60
+ "should_save": true,
61
+ "should_training_stop": false
62
+ },
63
+ "attributes": {}
64
+ }
65
+ },
66
+ "total_flos": 1.0051773955833856e+16,
67
+ "train_batch_size": 8,
68
+ "trial_name": null,
69
+ "trial_params": null
70
+ }
checkpoint-1000/optimizer_0/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5adc2f86584578d5b0fc9d27ea5dbfbb771072ee212717f7238e9996fd706b9
3
+ size 438490
checkpoint-1000/optimizer_0/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6d7394a57419b8165633d1023718e8463d05d7c238b55a2e7d2678f252c6749
3
+ size 2980252
checkpoint-1000/optimizer_0/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98819476dcc0947c7b0953b16a3a43dbb12f43c67dcd556c063b49171d1c069f
3
+ size 2997320
checkpoint-1000/optimizer_0/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7a1bb1ba5070ed8f6dfb63045e29b095ccdef856f78679e7b19b4ce0087b9f8
3
+ size 2997320
checkpoint-1000/optimizer_0/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7db8406b0d798fb108ace791356028126de7ad08f6b4eb0739c7cc8e9701cdf6
3
+ size 2997320
checkpoint-1000/optimizer_0/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63686f6bd6c50897e03d760b4f72958355d7496fb960c9f340850296f38d35b2
3
+ size 2997320
checkpoint-1000/optimizer_0/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6799641756030e3574650b44f2547e20e7d31b645cc25858a4411e1be2910925
3
+ size 2999596
checkpoint-1000/optimizer_0/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bdb585bfac57da8fec672146a2806ef304b10a4f6825ba24dfa84caaa05d598
3
+ size 2998732
checkpoint-1000/optimizer_0/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eecf6e24578cc6ea92838e932b80d8dbddc7485f60cf5abf3cf041f8ea8982ba
3
+ size 3005708
checkpoint-1000/pytorch_model_fsdp_0/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22dae21ec1ab3c79cfa84c69284e9d1d5c53be899d859a084832b641e53c0c58
3
+ size 170759
checkpoint-1000/pytorch_model_fsdp_0/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45fd5c265162921ffc10ba501bcf7301af17e682d1e7782bd851d7123a9ff52f
3
+ size 1489536
checkpoint-1000/pytorch_model_fsdp_0/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc340604862635e9a74d8a8ad2ab86843cae36b1ee0346e5ab344793aaa415a3
3
+ size 1489536
checkpoint-1000/pytorch_model_fsdp_0/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4e5135c37126c1dd364cda7c9a85dc035ff01e694ed0826916f50cfb8cbafe9
3
+ size 1489536
checkpoint-1000/pytorch_model_fsdp_0/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d429e58b5d0c3e5b18959b3586a288702734cc26e8c70a9bbf05d6e3db8c88cb
3
+ size 1489536
checkpoint-1000/pytorch_model_fsdp_0/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d953897b18811b23f76e97d2db97a0bf63f3d733026b90325e1fa1e1b4140816
3
+ size 1489536
checkpoint-1000/pytorch_model_fsdp_0/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b52f1b67d2571358eb10fed0ea265b7ba227f5d1ae555bbdd77309941eb6a82f
3
+ size 1489536
checkpoint-1000/pytorch_model_fsdp_0/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad5f2b67364597e2fe0e020f7a9251138cf95238cd02209fa386fedce5c7d894
3
+ size 1489536
checkpoint-1000/pytorch_model_fsdp_0/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:605e2ea118ae0cece4191888ba5264eb3241fcb31217526ef80d208ed4aa85be
3
+ size 1489536
checkpoint-1000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cce6fd095e8164b6174af806d5b65f1592b912a16965a6ac33d77e523c8ae2a
3
+ size 15920
checkpoint-1000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c89ca7da8c752e198c07a80618c28fafea39abe5f5e38d625a1d96b586893f6e
3
+ size 15984
checkpoint-1000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26a64aa0a7bd0e9443e2c11a9e1b32b905f251349e940dd3776471dd51dc9441
3
+ size 15984