Yilun-Kong commited on
Commit
adfc48f
·
verified ·
1 Parent(s): e6e23f3

Upload 175 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +175 -0
  2. backbone/prompt_model_mt160_5M_iter_400000 +3 -0
  3. gradient_16experts+moe/expert_0_iter_200000 +3 -0
  4. gradient_16experts+moe/expert_10_iter_200000 +3 -0
  5. gradient_16experts+moe/expert_11_iter_200000 +3 -0
  6. gradient_16experts+moe/expert_12_iter_200000 +3 -0
  7. gradient_16experts+moe/expert_13_iter_200000 +3 -0
  8. gradient_16experts+moe/expert_14_iter_200000 +3 -0
  9. gradient_16experts+moe/expert_15_iter_200000 +3 -0
  10. gradient_16experts+moe/expert_1_iter_200000 +3 -0
  11. gradient_16experts+moe/expert_2_iter_200000 +3 -0
  12. gradient_16experts+moe/expert_3_iter_200000 +3 -0
  13. gradient_16experts+moe/expert_4_iter_200000 +3 -0
  14. gradient_16experts+moe/expert_5_iter_200000 +3 -0
  15. gradient_16experts+moe/expert_6_iter_200000 +3 -0
  16. gradient_16experts+moe/expert_7_iter_200000 +3 -0
  17. gradient_16experts+moe/expert_8_iter_200000 +3 -0
  18. gradient_16experts+moe/expert_9_iter_200000 +3 -0
  19. gradient_16experts+moe/moe__iter_400000 +3 -0
  20. gradient_24experts+moe/expert_0_iter_200000 +3 -0
  21. gradient_24experts+moe/expert_10_iter_200000 +3 -0
  22. gradient_24experts+moe/expert_11_iter_200000 +3 -0
  23. gradient_24experts+moe/expert_12_iter_200000 +3 -0
  24. gradient_24experts+moe/expert_13_iter_200000 +3 -0
  25. gradient_24experts+moe/expert_14_iter_200000 +3 -0
  26. gradient_24experts+moe/expert_15_iter_200000 +3 -0
  27. gradient_24experts+moe/expert_16_iter_200000 +3 -0
  28. gradient_24experts+moe/expert_17_iter_200000 +3 -0
  29. gradient_24experts+moe/expert_18_iter_200000 +3 -0
  30. gradient_24experts+moe/expert_19_iter_200000 +3 -0
  31. gradient_24experts+moe/expert_1_iter_200000 +3 -0
  32. gradient_24experts+moe/expert_20_iter_200000 +3 -0
  33. gradient_24experts+moe/expert_21_iter_200000 +3 -0
  34. gradient_24experts+moe/expert_22_iter_200000 +3 -0
  35. gradient_24experts+moe/expert_23_iter_200000 +3 -0
  36. gradient_24experts+moe/expert_2_iter_200000 +3 -0
  37. gradient_24experts+moe/expert_3_iter_200000 +3 -0
  38. gradient_24experts+moe/expert_4_iter_200000 +3 -0
  39. gradient_24experts+moe/expert_5_iter_200000 +3 -0
  40. gradient_24experts+moe/expert_6_iter_200000 +3 -0
  41. gradient_24experts+moe/expert_7_iter_200000 +3 -0
  42. gradient_24experts+moe/expert_8_iter_200000 +3 -0
  43. gradient_24experts+moe/expert_9_iter_200000 +3 -0
  44. gradient_24experts+moe/moe__iter_400000 +3 -0
  45. gradient_32experts+moe/expert_0_iter_200000 +3 -0
  46. gradient_32experts+moe/expert_10_iter_200000 +3 -0
  47. gradient_32experts+moe/expert_11_iter_200000 +3 -0
  48. gradient_32experts+moe/expert_12_iter_200000 +3 -0
  49. gradient_32experts+moe/expert_13_iter_200000 +3 -0
  50. gradient_32experts+moe/expert_14_iter_200000 +3 -0
.gitattributes CHANGED
@@ -33,3 +33,178 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ backbone/prompt_model_mt160_5M_iter_400000 filter=lfs diff=lfs merge=lfs -text
37
+ gradient_16experts+moe/expert_0_iter_200000 filter=lfs diff=lfs merge=lfs -text
38
+ gradient_16experts+moe/expert_1_iter_200000 filter=lfs diff=lfs merge=lfs -text
39
+ gradient_16experts+moe/expert_10_iter_200000 filter=lfs diff=lfs merge=lfs -text
40
+ gradient_16experts+moe/expert_11_iter_200000 filter=lfs diff=lfs merge=lfs -text
41
+ gradient_16experts+moe/expert_12_iter_200000 filter=lfs diff=lfs merge=lfs -text
42
+ gradient_16experts+moe/expert_13_iter_200000 filter=lfs diff=lfs merge=lfs -text
43
+ gradient_16experts+moe/expert_14_iter_200000 filter=lfs diff=lfs merge=lfs -text
44
+ gradient_16experts+moe/expert_15_iter_200000 filter=lfs diff=lfs merge=lfs -text
45
+ gradient_16experts+moe/expert_2_iter_200000 filter=lfs diff=lfs merge=lfs -text
46
+ gradient_16experts+moe/expert_3_iter_200000 filter=lfs diff=lfs merge=lfs -text
47
+ gradient_16experts+moe/expert_4_iter_200000 filter=lfs diff=lfs merge=lfs -text
48
+ gradient_16experts+moe/expert_5_iter_200000 filter=lfs diff=lfs merge=lfs -text
49
+ gradient_16experts+moe/expert_6_iter_200000 filter=lfs diff=lfs merge=lfs -text
50
+ gradient_16experts+moe/expert_7_iter_200000 filter=lfs diff=lfs merge=lfs -text
51
+ gradient_16experts+moe/expert_8_iter_200000 filter=lfs diff=lfs merge=lfs -text
52
+ gradient_16experts+moe/expert_9_iter_200000 filter=lfs diff=lfs merge=lfs -text
53
+ gradient_16experts+moe/moe__iter_400000 filter=lfs diff=lfs merge=lfs -text
54
+ gradient_24experts+moe/expert_0_iter_200000 filter=lfs diff=lfs merge=lfs -text
55
+ gradient_24experts+moe/expert_1_iter_200000 filter=lfs diff=lfs merge=lfs -text
56
+ gradient_24experts+moe/expert_10_iter_200000 filter=lfs diff=lfs merge=lfs -text
57
+ gradient_24experts+moe/expert_11_iter_200000 filter=lfs diff=lfs merge=lfs -text
58
+ gradient_24experts+moe/expert_12_iter_200000 filter=lfs diff=lfs merge=lfs -text
59
+ gradient_24experts+moe/expert_13_iter_200000 filter=lfs diff=lfs merge=lfs -text
60
+ gradient_24experts+moe/expert_14_iter_200000 filter=lfs diff=lfs merge=lfs -text
61
+ gradient_24experts+moe/expert_15_iter_200000 filter=lfs diff=lfs merge=lfs -text
62
+ gradient_24experts+moe/expert_16_iter_200000 filter=lfs diff=lfs merge=lfs -text
63
+ gradient_24experts+moe/expert_17_iter_200000 filter=lfs diff=lfs merge=lfs -text
64
+ gradient_24experts+moe/expert_18_iter_200000 filter=lfs diff=lfs merge=lfs -text
65
+ gradient_24experts+moe/expert_19_iter_200000 filter=lfs diff=lfs merge=lfs -text
66
+ gradient_24experts+moe/expert_2_iter_200000 filter=lfs diff=lfs merge=lfs -text
67
+ gradient_24experts+moe/expert_20_iter_200000 filter=lfs diff=lfs merge=lfs -text
68
+ gradient_24experts+moe/expert_21_iter_200000 filter=lfs diff=lfs merge=lfs -text
69
+ gradient_24experts+moe/expert_22_iter_200000 filter=lfs diff=lfs merge=lfs -text
70
+ gradient_24experts+moe/expert_23_iter_200000 filter=lfs diff=lfs merge=lfs -text
71
+ gradient_24experts+moe/expert_3_iter_200000 filter=lfs diff=lfs merge=lfs -text
72
+ gradient_24experts+moe/expert_4_iter_200000 filter=lfs diff=lfs merge=lfs -text
73
+ gradient_24experts+moe/expert_5_iter_200000 filter=lfs diff=lfs merge=lfs -text
74
+ gradient_24experts+moe/expert_6_iter_200000 filter=lfs diff=lfs merge=lfs -text
75
+ gradient_24experts+moe/expert_7_iter_200000 filter=lfs diff=lfs merge=lfs -text
76
+ gradient_24experts+moe/expert_8_iter_200000 filter=lfs diff=lfs merge=lfs -text
77
+ gradient_24experts+moe/expert_9_iter_200000 filter=lfs diff=lfs merge=lfs -text
78
+ gradient_24experts+moe/moe__iter_400000 filter=lfs diff=lfs merge=lfs -text
79
+ gradient_32experts+moe/expert_0_iter_200000 filter=lfs diff=lfs merge=lfs -text
80
+ gradient_32experts+moe/expert_1_iter_200000 filter=lfs diff=lfs merge=lfs -text
81
+ gradient_32experts+moe/expert_10_iter_200000 filter=lfs diff=lfs merge=lfs -text
82
+ gradient_32experts+moe/expert_11_iter_200000 filter=lfs diff=lfs merge=lfs -text
83
+ gradient_32experts+moe/expert_12_iter_200000 filter=lfs diff=lfs merge=lfs -text
84
+ gradient_32experts+moe/expert_13_iter_200000 filter=lfs diff=lfs merge=lfs -text
85
+ gradient_32experts+moe/expert_14_iter_200000 filter=lfs diff=lfs merge=lfs -text
86
+ gradient_32experts+moe/expert_15_iter_200000 filter=lfs diff=lfs merge=lfs -text
87
+ gradient_32experts+moe/expert_16_iter_200000 filter=lfs diff=lfs merge=lfs -text
88
+ gradient_32experts+moe/expert_17_iter_200000 filter=lfs diff=lfs merge=lfs -text
89
+ gradient_32experts+moe/expert_18_iter_200000 filter=lfs diff=lfs merge=lfs -text
90
+ gradient_32experts+moe/expert_19_iter_200000 filter=lfs diff=lfs merge=lfs -text
91
+ gradient_32experts+moe/expert_2_iter_200000 filter=lfs diff=lfs merge=lfs -text
92
+ gradient_32experts+moe/expert_20_iter_200000 filter=lfs diff=lfs merge=lfs -text
93
+ gradient_32experts+moe/expert_21_iter_200000 filter=lfs diff=lfs merge=lfs -text
94
+ gradient_32experts+moe/expert_22_iter_200000 filter=lfs diff=lfs merge=lfs -text
95
+ gradient_32experts+moe/expert_23_iter_200000 filter=lfs diff=lfs merge=lfs -text
96
+ gradient_32experts+moe/expert_24_iter_200000 filter=lfs diff=lfs merge=lfs -text
97
+ gradient_32experts+moe/expert_25_iter_200000 filter=lfs diff=lfs merge=lfs -text
98
+ gradient_32experts+moe/expert_26_iter_200000 filter=lfs diff=lfs merge=lfs -text
99
+ gradient_32experts+moe/expert_27_iter_200000 filter=lfs diff=lfs merge=lfs -text
100
+ gradient_32experts+moe/expert_28_iter_200000 filter=lfs diff=lfs merge=lfs -text
101
+ gradient_32experts+moe/expert_29_iter_200000 filter=lfs diff=lfs merge=lfs -text
102
+ gradient_32experts+moe/expert_3_iter_200000 filter=lfs diff=lfs merge=lfs -text
103
+ gradient_32experts+moe/expert_30_iter_200000 filter=lfs diff=lfs merge=lfs -text
104
+ gradient_32experts+moe/expert_31_iter_200000 filter=lfs diff=lfs merge=lfs -text
105
+ gradient_32experts+moe/expert_4_iter_200000 filter=lfs diff=lfs merge=lfs -text
106
+ gradient_32experts+moe/expert_5_iter_200000 filter=lfs diff=lfs merge=lfs -text
107
+ gradient_32experts+moe/expert_6_iter_200000 filter=lfs diff=lfs merge=lfs -text
108
+ gradient_32experts+moe/expert_7_iter_200000 filter=lfs diff=lfs merge=lfs -text
109
+ gradient_32experts+moe/expert_8_iter_200000 filter=lfs diff=lfs merge=lfs -text
110
+ gradient_32experts+moe/expert_9_iter_200000 filter=lfs diff=lfs merge=lfs -text
111
+ gradient_32experts+moe/moe__iter_400000 filter=lfs diff=lfs merge=lfs -text
112
+ gradient_40experts+moe/expert_0_iter_200000 filter=lfs diff=lfs merge=lfs -text
113
+ gradient_40experts+moe/expert_1_iter_200000 filter=lfs diff=lfs merge=lfs -text
114
+ gradient_40experts+moe/expert_10_iter_200000 filter=lfs diff=lfs merge=lfs -text
115
+ gradient_40experts+moe/expert_11_iter_200000 filter=lfs diff=lfs merge=lfs -text
116
+ gradient_40experts+moe/expert_12_iter_200000 filter=lfs diff=lfs merge=lfs -text
117
+ gradient_40experts+moe/expert_13_iter_200000 filter=lfs diff=lfs merge=lfs -text
118
+ gradient_40experts+moe/expert_14_iter_200000 filter=lfs diff=lfs merge=lfs -text
119
+ gradient_40experts+moe/expert_15_iter_200000 filter=lfs diff=lfs merge=lfs -text
120
+ gradient_40experts+moe/expert_16_iter_200000 filter=lfs diff=lfs merge=lfs -text
121
+ gradient_40experts+moe/expert_17_iter_200000 filter=lfs diff=lfs merge=lfs -text
122
+ gradient_40experts+moe/expert_18_iter_200000 filter=lfs diff=lfs merge=lfs -text
123
+ gradient_40experts+moe/expert_19_iter_200000 filter=lfs diff=lfs merge=lfs -text
124
+ gradient_40experts+moe/expert_2_iter_200000 filter=lfs diff=lfs merge=lfs -text
125
+ gradient_40experts+moe/expert_20_iter_200000 filter=lfs diff=lfs merge=lfs -text
126
+ gradient_40experts+moe/expert_21_iter_200000 filter=lfs diff=lfs merge=lfs -text
127
+ gradient_40experts+moe/expert_22_iter_200000 filter=lfs diff=lfs merge=lfs -text
128
+ gradient_40experts+moe/expert_23_iter_200000 filter=lfs diff=lfs merge=lfs -text
129
+ gradient_40experts+moe/expert_24_iter_200000 filter=lfs diff=lfs merge=lfs -text
130
+ gradient_40experts+moe/expert_25_iter_200000 filter=lfs diff=lfs merge=lfs -text
131
+ gradient_40experts+moe/expert_26_iter_200000 filter=lfs diff=lfs merge=lfs -text
132
+ gradient_40experts+moe/expert_27_iter_200000 filter=lfs diff=lfs merge=lfs -text
133
+ gradient_40experts+moe/expert_28_iter_200000 filter=lfs diff=lfs merge=lfs -text
134
+ gradient_40experts+moe/expert_29_iter_200000 filter=lfs diff=lfs merge=lfs -text
135
+ gradient_40experts+moe/expert_3_iter_200000 filter=lfs diff=lfs merge=lfs -text
136
+ gradient_40experts+moe/expert_30_iter_200000 filter=lfs diff=lfs merge=lfs -text
137
+ gradient_40experts+moe/expert_31_iter_200000 filter=lfs diff=lfs merge=lfs -text
138
+ gradient_40experts+moe/expert_32_iter_200000 filter=lfs diff=lfs merge=lfs -text
139
+ gradient_40experts+moe/expert_33_iter_200000 filter=lfs diff=lfs merge=lfs -text
140
+ gradient_40experts+moe/expert_34_iter_200000 filter=lfs diff=lfs merge=lfs -text
141
+ gradient_40experts+moe/expert_35_iter_200000 filter=lfs diff=lfs merge=lfs -text
142
+ gradient_40experts+moe/expert_36_iter_200000 filter=lfs diff=lfs merge=lfs -text
143
+ gradient_40experts+moe/expert_37_iter_200000 filter=lfs diff=lfs merge=lfs -text
144
+ gradient_40experts+moe/expert_38_iter_200000 filter=lfs diff=lfs merge=lfs -text
145
+ gradient_40experts+moe/expert_39_iter_200000 filter=lfs diff=lfs merge=lfs -text
146
+ gradient_40experts+moe/expert_4_iter_200000 filter=lfs diff=lfs merge=lfs -text
147
+ gradient_40experts+moe/expert_5_iter_200000 filter=lfs diff=lfs merge=lfs -text
148
+ gradient_40experts+moe/expert_6_iter_200000 filter=lfs diff=lfs merge=lfs -text
149
+ gradient_40experts+moe/expert_7_iter_200000 filter=lfs diff=lfs merge=lfs -text
150
+ gradient_40experts+moe/expert_8_iter_200000 filter=lfs diff=lfs merge=lfs -text
151
+ gradient_40experts+moe/expert_9_iter_200000 filter=lfs diff=lfs merge=lfs -text
152
+ gradient_40experts+moe/moe__iter_400000 filter=lfs diff=lfs merge=lfs -text
153
+ gradient_48experts+moe/expert_0_iter_200000 filter=lfs diff=lfs merge=lfs -text
154
+ gradient_48experts+moe/expert_1_iter_200000 filter=lfs diff=lfs merge=lfs -text
155
+ gradient_48experts+moe/expert_10_iter_200000 filter=lfs diff=lfs merge=lfs -text
156
+ gradient_48experts+moe/expert_11_iter_200000 filter=lfs diff=lfs merge=lfs -text
157
+ gradient_48experts+moe/expert_12_iter_200000 filter=lfs diff=lfs merge=lfs -text
158
+ gradient_48experts+moe/expert_13_iter_200000 filter=lfs diff=lfs merge=lfs -text
159
+ gradient_48experts+moe/expert_14_iter_200000 filter=lfs diff=lfs merge=lfs -text
160
+ gradient_48experts+moe/expert_15_iter_200000 filter=lfs diff=lfs merge=lfs -text
161
+ gradient_48experts+moe/expert_16_iter_200000 filter=lfs diff=lfs merge=lfs -text
162
+ gradient_48experts+moe/expert_17_iter_200000 filter=lfs diff=lfs merge=lfs -text
163
+ gradient_48experts+moe/expert_18_iter_200000 filter=lfs diff=lfs merge=lfs -text
164
+ gradient_48experts+moe/expert_19_iter_200000 filter=lfs diff=lfs merge=lfs -text
165
+ gradient_48experts+moe/expert_2_iter_200000 filter=lfs diff=lfs merge=lfs -text
166
+ gradient_48experts+moe/expert_20_iter_200000 filter=lfs diff=lfs merge=lfs -text
167
+ gradient_48experts+moe/expert_21_iter_200000 filter=lfs diff=lfs merge=lfs -text
168
+ gradient_48experts+moe/expert_22_iter_200000 filter=lfs diff=lfs merge=lfs -text
169
+ gradient_48experts+moe/expert_23_iter_200000 filter=lfs diff=lfs merge=lfs -text
170
+ gradient_48experts+moe/expert_24_iter_200000 filter=lfs diff=lfs merge=lfs -text
171
+ gradient_48experts+moe/expert_25_iter_200000 filter=lfs diff=lfs merge=lfs -text
172
+ gradient_48experts+moe/expert_26_iter_200000 filter=lfs diff=lfs merge=lfs -text
173
+ gradient_48experts+moe/expert_27_iter_200000 filter=lfs diff=lfs merge=lfs -text
174
+ gradient_48experts+moe/expert_28_iter_200000 filter=lfs diff=lfs merge=lfs -text
175
+ gradient_48experts+moe/expert_29_iter_200000 filter=lfs diff=lfs merge=lfs -text
176
+ gradient_48experts+moe/expert_3_iter_200000 filter=lfs diff=lfs merge=lfs -text
177
+ gradient_48experts+moe/expert_30_iter_200000 filter=lfs diff=lfs merge=lfs -text
178
+ gradient_48experts+moe/expert_31_iter_200000 filter=lfs diff=lfs merge=lfs -text
179
+ gradient_48experts+moe/expert_32_iter_200000 filter=lfs diff=lfs merge=lfs -text
180
+ gradient_48experts+moe/expert_33_iter_200000 filter=lfs diff=lfs merge=lfs -text
181
+ gradient_48experts+moe/expert_34_iter_200000 filter=lfs diff=lfs merge=lfs -text
182
+ gradient_48experts+moe/expert_35_iter_200000 filter=lfs diff=lfs merge=lfs -text
183
+ gradient_48experts+moe/expert_36_iter_200000 filter=lfs diff=lfs merge=lfs -text
184
+ gradient_48experts+moe/expert_37_iter_200000 filter=lfs diff=lfs merge=lfs -text
185
+ gradient_48experts+moe/expert_38_iter_200000 filter=lfs diff=lfs merge=lfs -text
186
+ gradient_48experts+moe/expert_39_iter_200000 filter=lfs diff=lfs merge=lfs -text
187
+ gradient_48experts+moe/expert_4_iter_200000 filter=lfs diff=lfs merge=lfs -text
188
+ gradient_48experts+moe/expert_40_iter_200000 filter=lfs diff=lfs merge=lfs -text
189
+ gradient_48experts+moe/expert_41_iter_200000 filter=lfs diff=lfs merge=lfs -text
190
+ gradient_48experts+moe/expert_42_iter_200000 filter=lfs diff=lfs merge=lfs -text
191
+ gradient_48experts+moe/expert_43_iter_200000 filter=lfs diff=lfs merge=lfs -text
192
+ gradient_48experts+moe/expert_44_iter_200000 filter=lfs diff=lfs merge=lfs -text
193
+ gradient_48experts+moe/expert_45_iter_200000 filter=lfs diff=lfs merge=lfs -text
194
+ gradient_48experts+moe/expert_46_iter_200000 filter=lfs diff=lfs merge=lfs -text
195
+ gradient_48experts+moe/expert_47_iter_200000 filter=lfs diff=lfs merge=lfs -text
196
+ gradient_48experts+moe/expert_5_iter_200000 filter=lfs diff=lfs merge=lfs -text
197
+ gradient_48experts+moe/expert_6_iter_200000 filter=lfs diff=lfs merge=lfs -text
198
+ gradient_48experts+moe/expert_7_iter_200000 filter=lfs diff=lfs merge=lfs -text
199
+ gradient_48experts+moe/expert_8_iter_200000 filter=lfs diff=lfs merge=lfs -text
200
+ gradient_48experts+moe/expert_9_iter_200000 filter=lfs diff=lfs merge=lfs -text
201
+ gradient_48experts+moe/moe__iter_400000 filter=lfs diff=lfs merge=lfs -text
202
+ gradient_8experts+moe/expert_0_iter_200000 filter=lfs diff=lfs merge=lfs -text
203
+ gradient_8experts+moe/expert_1_iter_200000 filter=lfs diff=lfs merge=lfs -text
204
+ gradient_8experts+moe/expert_2_iter_200000 filter=lfs diff=lfs merge=lfs -text
205
+ gradient_8experts+moe/expert_3_iter_200000 filter=lfs diff=lfs merge=lfs -text
206
+ gradient_8experts+moe/expert_4_iter_200000 filter=lfs diff=lfs merge=lfs -text
207
+ gradient_8experts+moe/expert_5_iter_200000 filter=lfs diff=lfs merge=lfs -text
208
+ gradient_8experts+moe/expert_6_iter_200000 filter=lfs diff=lfs merge=lfs -text
209
+ gradient_8experts+moe/expert_7_iter_200000 filter=lfs diff=lfs merge=lfs -text
210
+ gradient_8experts+moe/moe__iter_400000 filter=lfs diff=lfs merge=lfs -text
backbone/prompt_model_mt160_5M_iter_400000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08003ee29180fa01769c6148eced5e60e512909886a864a620d85157da79ad67
3
+ size 27494207
gradient_16experts+moe/expert_0_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:924c1ce5d2e31c1b620149c939a4518ed0463042004bb13257f1b8d6c3db291c
3
+ size 12622266
gradient_16experts+moe/expert_10_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8dc75f9c2bf12e793305a95736b50e939d9a249c80c6ec922c4ec3f559f1856
3
+ size 12622294
gradient_16experts+moe/expert_11_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdd02092ef7d6704bcbb7ef25d786750684f61d6cc716d8b19bb770ce8613fa8
3
+ size 12622294
gradient_16experts+moe/expert_12_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85bdc271c598f92355cfe6a71bbe5ff74805f672aa542c825f157ad9d273c52b
3
+ size 12622294
gradient_16experts+moe/expert_13_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b904b154ac61be6b23c9e4bbb9904fa69e3475f86bbf62da7684f8c6c129aabb
3
+ size 12622294
gradient_16experts+moe/expert_14_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9468018defba2e54f4298da7602220952c6b72caceab547ae0bc379ef6e549b0
3
+ size 12622294
gradient_16experts+moe/expert_15_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9af0c1f90b85d825d2549bdca3dad1bedffd800e5422292f49b9267b6ccd8eac
3
+ size 12622294
gradient_16experts+moe/expert_1_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9003b8a3ff5e45c62c2a65db02309e0d5c281dc4c425018d7534a686e637bbb1
3
+ size 12622266
gradient_16experts+moe/expert_2_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3628d62a1b23fee338aa094fba1924fccd62a1ce1cf4022842999f6f929c855
3
+ size 12622266
gradient_16experts+moe/expert_3_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ff110f69ea304e2c8762d0200fd30669b464dd3748c34ce66c5a45d0c362142
3
+ size 12622266
gradient_16experts+moe/expert_4_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c3e8f7cd6797890691f314e8c8b2ff717a764724bec469775ef71eeb67602cf
3
+ size 12622266
gradient_16experts+moe/expert_5_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed0d938c5b06092def16387fc65dba5a272473de4d1500a12ec62385198e0cdd
3
+ size 12622266
gradient_16experts+moe/expert_6_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87a3a035657c142a205ecbf22d014bda0fe32e5e8a614272b69322bcfd1856ca
3
+ size 12622266
gradient_16experts+moe/expert_7_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ec41cbc1cefdbde381100c38629001330fb5cd695a974530d5c746a201a6bb7
3
+ size 12622266
gradient_16experts+moe/expert_8_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c03bc6b818dc344e5fe00cc32d3bdf22dff785008448b556ba5a4a1ed7042c8
3
+ size 12622266
gradient_16experts+moe/expert_9_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c89433f39cd3498b5f01928c9ff3ed6eb965e0dff9a769fe2a0b6ced9fd35cb
3
+ size 12622266
gradient_16experts+moe/moe__iter_400000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9061eebcc93071865376a16b119ab2b757279eaf668d377260a814a29abecb94
3
+ size 271466810
gradient_24experts+moe/expert_0_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6132688bbd02201bf5742dbf40edb1bb1170a743bea5b2519776b57e53c5126
3
+ size 12622266
gradient_24experts+moe/expert_10_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbe6eb366a4f9eef3ca97163d439f3208129fec96b0376330d8a6b96c9044997
3
+ size 12622294
gradient_24experts+moe/expert_11_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cc20850b0d707e1fa092666e53da5c92f43039ada98ba76b8786f44794076a0
3
+ size 12622294
gradient_24experts+moe/expert_12_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85bdc271c598f92355cfe6a71bbe5ff74805f672aa542c825f157ad9d273c52b
3
+ size 12622294
gradient_24experts+moe/expert_13_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b904b154ac61be6b23c9e4bbb9904fa69e3475f86bbf62da7684f8c6c129aabb
3
+ size 12622294
gradient_24experts+moe/expert_14_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1adee1efc2b999923de040b8858ba2c1293fc8e211f9fd340102c93f60bf8786
3
+ size 12622294
gradient_24experts+moe/expert_15_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:caa492eda8e6a4ec85e36db2cecf67367d015d6f01359cdf85263e34048a9303
3
+ size 12622294
gradient_24experts+moe/expert_16_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab388e402173ca620131170fd8adc1c157d6b629457c9de743becba0d65a2038
3
+ size 12622294
gradient_24experts+moe/expert_17_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5aae7bcfd4daf41262c2987acebc850549db91a605b85bcdd4abed57276c2c3a
3
+ size 12622294
gradient_24experts+moe/expert_18_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:528a6468ff3c7ba3c0dc032cf48814c41a177f6c493b7e5ec55d1120c4136537
3
+ size 12622294
gradient_24experts+moe/expert_19_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e37c68f854d0af51ee415aba4222ff89cc8256fefaf96382a51a3b4c83c18a59
3
+ size 12622294
gradient_24experts+moe/expert_1_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdb0af6b75ffe52c9f922336dfada4056350c835a26f3b387c23f411a9d957f9
3
+ size 12622266
gradient_24experts+moe/expert_20_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d108b3d041bcdc31750f64b37675dd494d333a9082e6da2ac2f0690d3bf67546
3
+ size 12622294
gradient_24experts+moe/expert_21_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:958afabb6923bfa74da954b02fcf55324bfa7262c61d2ea3f4fc524578764834
3
+ size 12622294
gradient_24experts+moe/expert_22_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bd2329f4acb149c566772570b7a422123d500e8b3513497625713fa4eea7c86
3
+ size 12622294
gradient_24experts+moe/expert_23_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:292ff81c71f70ffdfd883f6a2f49bc4f3e1218bd9d639f6e12dc6c95a8c964fe
3
+ size 12622294
gradient_24experts+moe/expert_2_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3628d62a1b23fee338aa094fba1924fccd62a1ce1cf4022842999f6f929c855
3
+ size 12622266
gradient_24experts+moe/expert_3_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ff110f69ea304e2c8762d0200fd30669b464dd3748c34ce66c5a45d0c362142
3
+ size 12622266
gradient_24experts+moe/expert_4_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a015fb668a030cbddd1a08b32368ec64fac613a4b436858b7c887d63d810f45
3
+ size 12622266
gradient_24experts+moe/expert_5_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c16ceb486404d9fbbf3da7b6c6600f19f5f14d06cd621783f0d59fcb19690475
3
+ size 12622266
gradient_24experts+moe/expert_6_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3513b14f36d63c233c0a62f121a63430b7592d47d5502830a62a6c28d8d77876
3
+ size 12622266
gradient_24experts+moe/expert_7_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71976c0974860812d4db6288b0764aa40770ae8e222bac25fcb2dc0517ef2241
3
+ size 12622266
gradient_24experts+moe/expert_8_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fa0fbe3aca040626716e64a5925335037d21e979be79f1b93500f3e383f4d7f
3
+ size 12622266
gradient_24experts+moe/expert_9_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df548440ead86fccd2f9af201a44b2922ea1089c5f86c2a6d21283e4d8d8a8a3
3
+ size 12622266
gradient_24experts+moe/moe__iter_400000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b59ae9cb83e093c2f2c2a917607038615bc3ac05420fe40b46ff6687012dfe28
3
+ size 372543674
gradient_32experts+moe/expert_0_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f21b4afb2e7bc2477ce046be0b47bcbc66bdcb2065edad4d33d8cceaf67494fa
3
+ size 12622266
gradient_32experts+moe/expert_10_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbc4216bdbc40dd8acde232f8e0dd433e4926e3f8e16124ad4b94e575984c4bc
3
+ size 12622294
gradient_32experts+moe/expert_11_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e5b83e3706f7ed581ad0cd0d1a659961734b7600b1331c06499a4f4a9383f07
3
+ size 12622294
gradient_32experts+moe/expert_12_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daff9d946380ab2e5504f87501529090c2dfd420772846207909da8505c7c49a
3
+ size 12622294
gradient_32experts+moe/expert_13_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:811633a03bfbe16c7d2a9cd6de1eff55e1d23f6d546d7cb6e4dce5f8b73039e6
3
+ size 12622294
gradient_32experts+moe/expert_14_iter_200000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c4c6b4fd904a7be879a1808498f9af572234efeac255e4b2f187146d75427cd
3
+ size 12622294