agentbot commited on
Commit
b0c65a4
·
verified ·
1 Parent(s): 16e34a3

Initial commit with folder contents

Browse files
.gitattributes CHANGED
@@ -33,3 +33,329 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ onnx/up_blocks.0/onnx__MatMul_6936 filter=lfs diff=lfs merge=lfs -text
37
+ onnx/up_blocks.0/onnx__MatMul_6920 filter=lfs diff=lfs merge=lfs -text
38
+ onnx/up_blocks.0/onnx__MatMul_6912 filter=lfs diff=lfs merge=lfs -text
39
+ onnx/up_blocks.0/onnx__MatMul_7600 filter=lfs diff=lfs merge=lfs -text
40
+ onnx/up_blocks.0/onnx__MatMul_6955 filter=lfs diff=lfs merge=lfs -text
41
+ onnx/up_blocks.0/onnx__MatMul_6958 filter=lfs diff=lfs merge=lfs -text
42
+ onnx/up_blocks.0/resnets.0.time_emb_proj.weight filter=lfs diff=lfs merge=lfs -text
43
+ onnx/up_blocks.0/onnx__MatMul_7203 filter=lfs diff=lfs merge=lfs -text
44
+ onnx/up_blocks.0/onnx__MatMul_7624 filter=lfs diff=lfs merge=lfs -text
45
+ onnx/up_blocks.0/onnx__MatMul_6910 filter=lfs diff=lfs merge=lfs -text
46
+ onnx/up_blocks.0/onnx__MatMul_6969 filter=lfs diff=lfs merge=lfs -text
47
+ onnx/up_blocks.0/onnx__MatMul_6909 filter=lfs diff=lfs merge=lfs -text
48
+ onnx/up_blocks.0/onnx__MatMul_6911 filter=lfs diff=lfs merge=lfs -text
49
+ onnx/up_blocks.0/onnx__MatMul_6935 filter=lfs diff=lfs merge=lfs -text
50
+ onnx/up_blocks.0/onnx__MatMul_7003 filter=lfs diff=lfs merge=lfs -text
51
+ onnx/up_blocks.0/onnx__MatMul_6979 filter=lfs diff=lfs merge=lfs -text
52
+ onnx/up_blocks.0/resnets.2.time_emb_proj.weight filter=lfs diff=lfs merge=lfs -text
53
+ onnx/up_blocks.0/onnx__MatMul_7193 filter=lfs diff=lfs merge=lfs -text
54
+ onnx/up_blocks.0/onnx__MatMul_6921 filter=lfs diff=lfs merge=lfs -text
55
+ onnx/up_blocks.0/onnx__MatMul_7017 filter=lfs diff=lfs merge=lfs -text
56
+ onnx/up_blocks.0/onnx__MatMul_7609 filter=lfs diff=lfs merge=lfs -text
57
+ onnx/up_blocks.0/onnx__MatMul_6982 filter=lfs diff=lfs merge=lfs -text
58
+ onnx/up_blocks.0/onnx__MatMul_7169 filter=lfs diff=lfs merge=lfs -text
59
+ onnx/up_blocks.0/onnx__MatMul_6984 filter=lfs diff=lfs merge=lfs -text
60
+ onnx/up_blocks.0/onnx__MatMul_7182 filter=lfs diff=lfs merge=lfs -text
61
+ onnx/up_blocks.0/onnx__MatMul_7031 filter=lfs diff=lfs merge=lfs -text
62
+ onnx/up_blocks.0/onnx__MatMul_7041 filter=lfs diff=lfs merge=lfs -text
63
+ onnx/up_blocks.0/onnx__MatMul_6934 filter=lfs diff=lfs merge=lfs -text
64
+ onnx/up_blocks.0/onnx__MatMul_7619 filter=lfs diff=lfs merge=lfs -text
65
+ onnx/up_blocks.0/onnx__MatMul_6931 filter=lfs diff=lfs merge=lfs -text
66
+ onnx/up_blocks.0/onnx__MatMul_6968 filter=lfs diff=lfs merge=lfs -text
67
+ onnx/up_blocks.0/onnx__MatMul_7160 filter=lfs diff=lfs merge=lfs -text
68
+ onnx/up_blocks.0/onnx__MatMul_7008 filter=lfs diff=lfs merge=lfs -text
69
+ onnx/up_blocks.0/onnx__MatMul_6945 filter=lfs diff=lfs merge=lfs -text
70
+ onnx/up_blocks.0/onnx__MatMul_7632 filter=lfs diff=lfs merge=lfs -text
71
+ onnx/up_blocks.0/onnx__MatMul_7168 filter=lfs diff=lfs merge=lfs -text
72
+ onnx/up_blocks.0/onnx__MatMul_7007 filter=lfs diff=lfs merge=lfs -text
73
+ onnx/up_blocks.0/onnx__MatMul_7030 filter=lfs diff=lfs merge=lfs -text
74
+ onnx/up_blocks.0/resnets.1.time_emb_proj.weight filter=lfs diff=lfs merge=lfs -text
75
+ onnx/up_blocks.0/onnx__MatMul_7103 filter=lfs diff=lfs merge=lfs -text
76
+ onnx/up_blocks.0/onnx__MatMul_7040 filter=lfs diff=lfs merge=lfs -text
77
+ onnx/up_blocks.0/onnx__MatMul_7065 filter=lfs diff=lfs merge=lfs -text
78
+ onnx/up_blocks.0/onnx__MatMul_7157 filter=lfs diff=lfs merge=lfs -text
79
+ onnx/up_blocks.0/onnx__MatMul_7055 filter=lfs diff=lfs merge=lfs -text
80
+ onnx/up_blocks.0/onnx__MatMul_7006 filter=lfs diff=lfs merge=lfs -text
81
+ onnx/up_blocks.0/onnx__MatMul_7089 filter=lfs diff=lfs merge=lfs -text
82
+ onnx/up_blocks.0/onnx__MatMul_6993 filter=lfs diff=lfs merge=lfs -text
83
+ onnx/up_blocks.0/resnets.2.conv_shortcut.weight filter=lfs diff=lfs merge=lfs -text
84
+ onnx/up_blocks.0/onnx__MatMul_7054 filter=lfs diff=lfs merge=lfs -text
85
+ onnx/up_blocks.0/onnx__MatMul_7099 filter=lfs diff=lfs merge=lfs -text
86
+ onnx/up_blocks.0/onnx__MatMul_7158 filter=lfs diff=lfs merge=lfs -text
87
+ onnx/up_blocks.0/onnx__MatMul_7137 filter=lfs diff=lfs merge=lfs -text
88
+ onnx/up_blocks.0/onnx__MatMul_7027 filter=lfs diff=lfs merge=lfs -text
89
+ onnx/up_blocks.0/onnx__MatMul_7147 filter=lfs diff=lfs merge=lfs -text
90
+ onnx/up_blocks.0/onnx__MatMul_7112 filter=lfs diff=lfs merge=lfs -text
91
+ onnx/up_blocks.0/onnx__MatMul_7128 filter=lfs diff=lfs merge=lfs -text
92
+ onnx/up_blocks.0/onnx__MatMul_7104 filter=lfs diff=lfs merge=lfs -text
93
+ onnx/up_blocks.0/onnx__MatMul_7016 filter=lfs diff=lfs merge=lfs -text
94
+ onnx/up_blocks.0/onnx__MatMul_6960 filter=lfs diff=lfs merge=lfs -text
95
+ onnx/up_blocks.0/onnx__MatMul_6994 filter=lfs diff=lfs merge=lfs -text
96
+ onnx/up_blocks.0/onnx__MatMul_7126 filter=lfs diff=lfs merge=lfs -text
97
+ onnx/up_blocks.0/onnx__MatMul_7018 filter=lfs diff=lfs merge=lfs -text
98
+ onnx/up_blocks.0/onnx__MatMul_6922 filter=lfs diff=lfs merge=lfs -text
99
+ onnx/up_blocks.0/onnx__MatMul_6970 filter=lfs diff=lfs merge=lfs -text
100
+ onnx/up_blocks.0/onnx__MatMul_7611 filter=lfs diff=lfs merge=lfs -text
101
+ onnx/up_blocks.0/onnx__MatMul_7042 filter=lfs diff=lfs merge=lfs -text
102
+ onnx/up_blocks.0/onnx__MatMul_6947 filter=lfs diff=lfs merge=lfs -text
103
+ onnx/up_blocks.0/onnx__MatMul_7019 filter=lfs diff=lfs merge=lfs -text
104
+ onnx/up_blocks.0/onnx__MatMul_7634 filter=lfs diff=lfs merge=lfs -text
105
+ onnx/up_blocks.0/onnx__MatMul_7192 filter=lfs diff=lfs merge=lfs -text
106
+ onnx/up_blocks.0/onnx__MatMul_6971 filter=lfs diff=lfs merge=lfs -text
107
+ onnx/up_blocks.0/resnets.0.conv_shortcut.weight filter=lfs diff=lfs merge=lfs -text
108
+ onnx/up_blocks.0/onnx__MatMul_7633 filter=lfs diff=lfs merge=lfs -text
109
+ onnx/up_blocks.0/onnx__MatMul_7090 filter=lfs diff=lfs merge=lfs -text
110
+ onnx/up_blocks.0/onnx__MatMul_7241 filter=lfs diff=lfs merge=lfs -text
111
+ onnx/up_blocks.0/onnx__MatMul_7032 filter=lfs diff=lfs merge=lfs -text
112
+ onnx/up_blocks.0/onnx__MatMul_6995 filter=lfs diff=lfs merge=lfs -text
113
+ onnx/up_blocks.0/onnx__MatMul_6946 filter=lfs diff=lfs merge=lfs -text
114
+ onnx/up_blocks.0/onnx__MatMul_7227 filter=lfs diff=lfs merge=lfs -text
115
+ onnx/up_blocks.0/onnx__MatMul_7635 filter=lfs diff=lfs merge=lfs -text
116
+ onnx/up_blocks.0/onnx__MatMul_7232 filter=lfs diff=lfs merge=lfs -text
117
+ onnx/up_blocks.0/onnx__MatMul_7043 filter=lfs diff=lfs merge=lfs -text
118
+ onnx/up_blocks.0/onnx__MatMul_7136 filter=lfs diff=lfs merge=lfs -text
119
+ onnx/up_blocks.0/onnx__MatMul_7587 filter=lfs diff=lfs merge=lfs -text
120
+ onnx/up_blocks.0/onnx__MatMul_7127 filter=lfs diff=lfs merge=lfs -text
121
+ onnx/up_blocks.0/onnx__MatMul_7088 filter=lfs diff=lfs merge=lfs -text
122
+ onnx/up_blocks.0/onnx__MatMul_7254 filter=lfs diff=lfs merge=lfs -text
123
+ onnx/up_blocks.0/onnx__MatMul_6923 filter=lfs diff=lfs merge=lfs -text
124
+ onnx/up_blocks.0/onnx__MatMul_7080 filter=lfs diff=lfs merge=lfs -text
125
+ onnx/up_blocks.0/onnx__MatMul_7056 filter=lfs diff=lfs merge=lfs -text
126
+ onnx/up_blocks.0/onnx__MatMul_7123 filter=lfs diff=lfs merge=lfs -text
127
+ onnx/up_blocks.0/onnx__MatMul_7075 filter=lfs diff=lfs merge=lfs -text
128
+ onnx/up_blocks.0/resnets.1.conv_shortcut.weight filter=lfs diff=lfs merge=lfs -text
129
+ onnx/up_blocks.0/onnx__MatMul_7064 filter=lfs diff=lfs merge=lfs -text
130
+ onnx/up_blocks.0/onnx__MatMul_7051 filter=lfs diff=lfs merge=lfs -text
131
+ onnx/up_blocks.0/onnx__MatMul_7079 filter=lfs diff=lfs merge=lfs -text
132
+ onnx/up_blocks.0/onnx__MatMul_7347 filter=lfs diff=lfs merge=lfs -text
133
+ onnx/up_blocks.0/onnx__MatMul_7371 filter=lfs diff=lfs merge=lfs -text
134
+ onnx/up_blocks.0/onnx__MatMul_7102 filter=lfs diff=lfs merge=lfs -text
135
+ onnx/up_blocks.0/onnx__MatMul_7078 filter=lfs diff=lfs merge=lfs -text
136
+ onnx/up_blocks.0/onnx__MatMul_7336 filter=lfs diff=lfs merge=lfs -text
137
+ onnx/up_blocks.0/onnx__MatMul_7360 filter=lfs diff=lfs merge=lfs -text
138
+ onnx/up_blocks.0/onnx__MatMul_7361 filter=lfs diff=lfs merge=lfs -text
139
+ onnx/up_blocks.0/onnx__MatMul_7179 filter=lfs diff=lfs merge=lfs -text
140
+ onnx/up_blocks.0/onnx__MatMul_7159 filter=lfs diff=lfs merge=lfs -text
141
+ onnx/up_blocks.0/onnx__MatMul_7184 filter=lfs diff=lfs merge=lfs -text
142
+ onnx/up_blocks.0/onnx__MatMul_7113 filter=lfs diff=lfs merge=lfs -text
143
+ onnx/up_blocks.0/onnx__MatMul_7328 filter=lfs diff=lfs merge=lfs -text
144
+ onnx/up_blocks.0/onnx__MatMul_7376 filter=lfs diff=lfs merge=lfs -text
145
+ onnx/up_blocks.0/onnx__MatMul_7337 filter=lfs diff=lfs merge=lfs -text
146
+ onnx/up_blocks.0/onnx__MatMul_7115 filter=lfs diff=lfs merge=lfs -text
147
+ onnx/up_blocks.0/onnx__MatMul_7171 filter=lfs diff=lfs merge=lfs -text
148
+ onnx/up_blocks.0/onnx__MatMul_7407 filter=lfs diff=lfs merge=lfs -text
149
+ onnx/up_blocks.0/onnx__MatMul_7256 filter=lfs diff=lfs merge=lfs -text
150
+ onnx/up_blocks.0/onnx__MatMul_7216 filter=lfs diff=lfs merge=lfs -text
151
+ onnx/up_blocks.0/onnx__MatMul_7326 filter=lfs diff=lfs merge=lfs -text
152
+ onnx/up_blocks.0/onnx__MatMul_7251 filter=lfs diff=lfs merge=lfs -text
153
+ onnx/up_blocks.0/onnx__MatMul_7208 filter=lfs diff=lfs merge=lfs -text
154
+ onnx/up_blocks.0/onnx__MatMul_7303 filter=lfs diff=lfs merge=lfs -text
155
+ onnx/up_blocks.0/onnx__MatMul_7217 filter=lfs diff=lfs merge=lfs -text
156
+ onnx/up_blocks.0/onnx__MatMul_7350 filter=lfs diff=lfs merge=lfs -text
157
+ onnx/up_blocks.0/onnx__MatMul_7231 filter=lfs diff=lfs merge=lfs -text
158
+ onnx/up_blocks.0/onnx__MatMul_7183 filter=lfs diff=lfs merge=lfs -text
159
+ onnx/up_blocks.0/onnx__MatMul_7265 filter=lfs diff=lfs merge=lfs -text
160
+ onnx/up_blocks.0/onnx__MatMul_7240 filter=lfs diff=lfs merge=lfs -text
161
+ onnx/up_blocks.0/onnx__MatMul_7206 filter=lfs diff=lfs merge=lfs -text
162
+ onnx/up_blocks.0/onnx__MatMul_7323 filter=lfs diff=lfs merge=lfs -text
163
+ onnx/up_blocks.0/onnx__MatMul_7207 filter=lfs diff=lfs merge=lfs -text
164
+ onnx/up_blocks.0/onnx__MatMul_7289 filter=lfs diff=lfs merge=lfs -text
165
+ onnx/up_blocks.0/onnx__MatMul_7091 filter=lfs diff=lfs merge=lfs -text
166
+ onnx/up_blocks.0/onnx__MatMul_7230 filter=lfs diff=lfs merge=lfs -text
167
+ onnx/up_blocks.0/onnx__MatMul_7066 filter=lfs diff=lfs merge=lfs -text
168
+ onnx/up_blocks.0/onnx__MatMul_6944 filter=lfs diff=lfs merge=lfs -text
169
+ onnx/up_blocks.0/onnx__MatMul_7067 filter=lfs diff=lfs merge=lfs -text
170
+ onnx/up_blocks.0/onnx__MatMul_7290 filter=lfs diff=lfs merge=lfs -text
171
+ onnx/up_blocks.0/onnx__MatMul_6983 filter=lfs diff=lfs merge=lfs -text
172
+ onnx/up_blocks.0/onnx__MatMul_7139 filter=lfs diff=lfs merge=lfs -text
173
+ onnx/up_blocks.0/onnx__MatMul_7302 filter=lfs diff=lfs merge=lfs -text
174
+ onnx/up_blocks.0/onnx__MatMul_6992 filter=lfs diff=lfs merge=lfs -text
175
+ onnx/up_blocks.0/onnx__MatMul_7315 filter=lfs diff=lfs merge=lfs -text
176
+ onnx/up_blocks.0/onnx__MatMul_6959 filter=lfs diff=lfs merge=lfs -text
177
+ onnx/up_blocks.0/onnx__MatMul_7299 filter=lfs diff=lfs merge=lfs -text
178
+ onnx/up_blocks.0/onnx__MatMul_7386 filter=lfs diff=lfs merge=lfs -text
179
+ onnx/up_blocks.0/onnx__MatMul_7138 filter=lfs diff=lfs merge=lfs -text
180
+ onnx/up_blocks.0/onnx__MatMul_7363 filter=lfs diff=lfs merge=lfs -text
181
+ onnx/up_blocks.0/onnx__MatMul_7312 filter=lfs diff=lfs merge=lfs -text
182
+ onnx/up_blocks.0/onnx__MatMul_7114 filter=lfs diff=lfs merge=lfs -text
183
+ onnx/up_blocks.0/onnx__MatMul_7219 filter=lfs diff=lfs merge=lfs -text
184
+ onnx/up_blocks.0/onnx__MatMul_7150 filter=lfs diff=lfs merge=lfs -text
185
+ onnx/up_blocks.0/onnx__MatMul_7195 filter=lfs diff=lfs merge=lfs -text
186
+ onnx/up_blocks.0/onnx__MatMul_7351 filter=lfs diff=lfs merge=lfs -text
187
+ onnx/up_blocks.0/onnx__MatMul_7264 filter=lfs diff=lfs merge=lfs -text
188
+ onnx/up_blocks.0/onnx__MatMul_7327 filter=lfs diff=lfs merge=lfs -text
189
+ onnx/up_blocks.0/onnx__MatMul_7280 filter=lfs diff=lfs merge=lfs -text
190
+ onnx/up_blocks.0/onnx__MatMul_7170 filter=lfs diff=lfs merge=lfs -text
191
+ onnx/up_blocks.0/onnx__MatMul_7313 filter=lfs diff=lfs merge=lfs -text
192
+ onnx/up_blocks.0/onnx__MatMul_7255 filter=lfs diff=lfs merge=lfs -text
193
+ onnx/up_blocks.0/onnx__MatMul_7242 filter=lfs diff=lfs merge=lfs -text
194
+ onnx/up_blocks.0/onnx__MatMul_7278 filter=lfs diff=lfs merge=lfs -text
195
+ onnx/up_blocks.0/onnx__MatMul_7291 filter=lfs diff=lfs merge=lfs -text
196
+ onnx/up_blocks.0/onnx__MatMul_7279 filter=lfs diff=lfs merge=lfs -text
197
+ onnx/up_blocks.0/onnx__MatMul_7288 filter=lfs diff=lfs merge=lfs -text
198
+ onnx/up_blocks.0/onnx__MatMul_7304 filter=lfs diff=lfs merge=lfs -text
199
+ onnx/up_blocks.0/onnx__MatMul_7352 filter=lfs diff=lfs merge=lfs -text
200
+ onnx/up_blocks.0/onnx__MatMul_7218 filter=lfs diff=lfs merge=lfs -text
201
+ onnx/up_blocks.0/onnx__MatMul_7499 filter=lfs diff=lfs merge=lfs -text
202
+ engine/down_blocks.0.plan filter=lfs diff=lfs merge=lfs -text
203
+ onnx/up_blocks.0/onnx__MatMul_7432 filter=lfs diff=lfs merge=lfs -text
204
+ onnx/up_blocks.0/onnx__MatMul_7503 filter=lfs diff=lfs merge=lfs -text
205
+ onnx/up_blocks.0/onnx__MatMul_7480 filter=lfs diff=lfs merge=lfs -text
206
+ onnx/up_blocks.0/onnx__MatMul_7488 filter=lfs diff=lfs merge=lfs -text
207
+ onnx/up_blocks.0/onnx__MatMul_7464 filter=lfs diff=lfs merge=lfs -text
208
+ onnx/up_blocks.0/onnx__MatMul_7475 filter=lfs diff=lfs merge=lfs -text
209
+ onnx/up_blocks.0/onnx__MatMul_7502 filter=lfs diff=lfs merge=lfs -text
210
+ onnx/up_blocks.0/onnx__MatMul_7513 filter=lfs diff=lfs merge=lfs -text
211
+ onnx/up_blocks.0/onnx__MatMul_7527 filter=lfs diff=lfs merge=lfs -text
212
+ onnx/up_blocks.0/onnx__MatMul_7537 filter=lfs diff=lfs merge=lfs -text
213
+ onnx/up_blocks.0/onnx__MatMul_7275 filter=lfs diff=lfs merge=lfs -text
214
+ onnx/up_blocks.0/onnx__MatMul_7430 filter=lfs diff=lfs merge=lfs -text
215
+ onnx/up_blocks.0/onnx__MatMul_7267 filter=lfs diff=lfs merge=lfs -text
216
+ onnx/up_blocks.0/onnx__MatMul_7465 filter=lfs diff=lfs merge=lfs -text
217
+ onnx/up_blocks.0/onnx__MatMul_7405 filter=lfs diff=lfs merge=lfs -text
218
+ onnx/up_blocks.0/onnx__MatMul_7512 filter=lfs diff=lfs merge=lfs -text
219
+ onnx/up_blocks.0/onnx__MatMul_7339 filter=lfs diff=lfs merge=lfs -text
220
+ onnx/up_blocks.0/onnx__MatMul_7427 filter=lfs diff=lfs merge=lfs -text
221
+ onnx/up_blocks.0/onnx__MatMul_7194 filter=lfs diff=lfs merge=lfs -text
222
+ onnx/up_blocks.0/onnx__MatMul_7243 filter=lfs diff=lfs merge=lfs -text
223
+ onnx/up_blocks.0/onnx__MatMul_7621 filter=lfs diff=lfs merge=lfs -text
224
+ onnx/up_blocks.0/onnx__MatMul_7266 filter=lfs diff=lfs merge=lfs -text
225
+ onnx/up_blocks.0/onnx__MatMul_7456 filter=lfs diff=lfs merge=lfs -text
226
+ onnx/up_blocks.0/onnx__MatMul_7395 filter=lfs diff=lfs merge=lfs -text
227
+ onnx/up_blocks.0/onnx__MatMul_7077 filter=lfs diff=lfs merge=lfs -text
228
+ onnx/up_blocks.0/onnx__MatMul_7417 filter=lfs diff=lfs merge=lfs -text
229
+ onnx/up_blocks.0/onnx__MatMul_7585 filter=lfs diff=lfs merge=lfs -text
230
+ onnx/up_blocks.0/onnx__MatMul_7314 filter=lfs diff=lfs merge=lfs -text
231
+ onnx/up_blocks.0/onnx__MatMul_7623 filter=lfs diff=lfs merge=lfs -text
232
+ onnx/up_blocks.0/onnx__MatMul_7443 filter=lfs diff=lfs merge=lfs -text
233
+ onnx/up_blocks.0/onnx__MatMul_7419 filter=lfs diff=lfs merge=lfs -text
234
+ onnx/up_blocks.0/onnx__MatMul_7643 filter=lfs diff=lfs merge=lfs -text
235
+ onnx/up_blocks.0/onnx__MatMul_7489 filter=lfs diff=lfs merge=lfs -text
236
+ onnx/up_blocks.0/onnx__MatMul_7455 filter=lfs diff=lfs merge=lfs -text
237
+ onnx/up_blocks.0/onnx__MatMul_7479 filter=lfs diff=lfs merge=lfs -text
238
+ onnx/up_blocks.0/onnx__MatMul_7549 filter=lfs diff=lfs merge=lfs -text
239
+ onnx/up_blocks.0/onnx__MatMul_6957 filter=lfs diff=lfs merge=lfs -text
240
+ onnx/up_blocks.0/onnx__MatMul_6933 filter=lfs diff=lfs merge=lfs -text
241
+ onnx/up_blocks.0/onnx__MatMul_7504 filter=lfs diff=lfs merge=lfs -text
242
+ onnx/up_blocks.0/onnx__MatMul_7431 filter=lfs diff=lfs merge=lfs -text
243
+ onnx/up_blocks.0/onnx__MatMul_7441 filter=lfs diff=lfs merge=lfs -text
244
+ onnx/up_blocks.0/onnx__MatMul_7440 filter=lfs diff=lfs merge=lfs -text
245
+ onnx/up_blocks.0/onnx__MatMul_7408 filter=lfs diff=lfs merge=lfs -text
246
+ onnx/up_blocks.0/onnx__MatMul_7466 filter=lfs diff=lfs merge=lfs -text
247
+ onnx/up_blocks.0/onnx__MatMul_7416 filter=lfs diff=lfs merge=lfs -text
248
+ onnx/up_blocks.0/onnx__MatMul_7029 filter=lfs diff=lfs merge=lfs -text
249
+ onnx/up_blocks.0/onnx__MatMul_7406 filter=lfs diff=lfs merge=lfs -text
250
+ onnx/up_blocks.0/onnx__MatMul_7552 filter=lfs diff=lfs merge=lfs -text
251
+ onnx/up_blocks.0/onnx__MatMul_7451 filter=lfs diff=lfs merge=lfs -text
252
+ onnx/up_blocks.0/onnx__MatMul_7053 filter=lfs diff=lfs merge=lfs -text
253
+ onnx/up_blocks.0/onnx__MatMul_7005 filter=lfs diff=lfs merge=lfs -text
254
+ onnx/up_blocks.0/onnx__MatMul_7646 filter=lfs diff=lfs merge=lfs -text
255
+ onnx/up_blocks.0/onnx__MatMul_7560 filter=lfs diff=lfs merge=lfs -text
256
+ onnx/up_blocks.0/onnx__MatMul_7338 filter=lfs diff=lfs merge=lfs -text
257
+ onnx/up_blocks.0/onnx__MatMul_7595 filter=lfs diff=lfs merge=lfs -text
258
+ onnx/up_blocks.0/onnx__MatMul_7362 filter=lfs diff=lfs merge=lfs -text
259
+ onnx/up_blocks.0/onnx__MatMul_7574 filter=lfs diff=lfs merge=lfs -text
260
+ onnx/up_blocks.0/onnx__MatMul_7454 filter=lfs diff=lfs merge=lfs -text
261
+ onnx/up_blocks.0/onnx__MatMul_7645 filter=lfs diff=lfs merge=lfs -text
262
+ onnx/up_blocks.0/onnx__MatMul_7622 filter=lfs diff=lfs merge=lfs -text
263
+ onnx/up_blocks.0/onnx__MatMul_7478 filter=lfs diff=lfs merge=lfs -text
264
+ onnx/up_blocks.0/onnx__MatMul_7598 filter=lfs diff=lfs merge=lfs -text
265
+ onnx/up_blocks.0/onnx__MatMul_7608 filter=lfs diff=lfs merge=lfs -text
266
+ onnx/up_blocks.0/onnx__MatMul_7575 filter=lfs diff=lfs merge=lfs -text
267
+ onnx/up_blocks.0/onnx__MatMul_7561 filter=lfs diff=lfs merge=lfs -text
268
+ onnx/up_blocks.0/onnx__MatMul_7563 filter=lfs diff=lfs merge=lfs -text
269
+ onnx/up_blocks.0/onnx__MatMul_7375 filter=lfs diff=lfs merge=lfs -text
270
+ onnx/up_blocks.0/onnx__MatMul_7385 filter=lfs diff=lfs merge=lfs -text
271
+ onnx/up_blocks.0/onnx__MatMul_7584 filter=lfs diff=lfs merge=lfs -text
272
+ onnx/up_blocks.0/onnx__MatMul_7384 filter=lfs diff=lfs merge=lfs -text
273
+ onnx/up_blocks.0/onnx__MatMul_7467 filter=lfs diff=lfs merge=lfs -text
274
+ onnx/up_blocks.0/onnx__MatMul_7599 filter=lfs diff=lfs merge=lfs -text
275
+ onnx/up_blocks.0/onnx__MatMul_7551 filter=lfs diff=lfs merge=lfs -text
276
+ onnx/up_blocks.0/onnx__MatMul_7571 filter=lfs diff=lfs merge=lfs -text
277
+ onnx/up_blocks.0/onnx__MatMul_7125 filter=lfs diff=lfs merge=lfs -text
278
+ onnx/up_blocks.0/onnx__MatMul_7442 filter=lfs diff=lfs merge=lfs -text
279
+ onnx/up_blocks.0/onnx__MatMul_7528 filter=lfs diff=lfs merge=lfs -text
280
+ onnx/up_blocks.0/onnx__MatMul_7576 filter=lfs diff=lfs merge=lfs -text
281
+ onnx/up_blocks.0/onnx__MatMul_7398 filter=lfs diff=lfs merge=lfs -text
282
+ onnx/up_blocks.0/onnx__MatMul_7418 filter=lfs diff=lfs merge=lfs -text
283
+ onnx/up_blocks.0/onnx__MatMul_7562 filter=lfs diff=lfs merge=lfs -text
284
+ onnx/up_blocks.0/onnx__MatMul_7374 filter=lfs diff=lfs merge=lfs -text
285
+ onnx/up_blocks.0/onnx__MatMul_7610 filter=lfs diff=lfs merge=lfs -text
286
+ onnx/up_blocks.0/onnx__MatMul_7101 filter=lfs diff=lfs merge=lfs -text
287
+ onnx/up_blocks.0/onnx__MatMul_7523 filter=lfs diff=lfs merge=lfs -text
288
+ onnx/up_blocks.0/onnx__MatMul_7526 filter=lfs diff=lfs merge=lfs -text
289
+ onnx/up_blocks.0/onnx__MatMul_7536 filter=lfs diff=lfs merge=lfs -text
290
+ onnx/up_blocks.0/onnx__MatMul_7586 filter=lfs diff=lfs merge=lfs -text
291
+ onnx/up_blocks.0/onnx__MatMul_6981 filter=lfs diff=lfs merge=lfs -text
292
+ onnx/up_blocks.0/onnx__MatMul_7387 filter=lfs diff=lfs merge=lfs -text
293
+ onnx/up_blocks.0/onnx__MatMul_7277 filter=lfs diff=lfs merge=lfs -text
294
+ onnx/up_blocks.0/onnx__MatMul_7547 filter=lfs diff=lfs merge=lfs -text
295
+ onnx/up_blocks.0/onnx__MatMul_7539 filter=lfs diff=lfs merge=lfs -text
296
+ onnx/up_blocks.0/onnx__MatMul_7550 filter=lfs diff=lfs merge=lfs -text
297
+ onnx/up_blocks.0/onnx__MatMul_7515 filter=lfs diff=lfs merge=lfs -text
298
+ onnx/up_blocks.0/onnx__MatMul_7491 filter=lfs diff=lfs merge=lfs -text
299
+ onnx/up_blocks.0/onnx__MatMul_7514 filter=lfs diff=lfs merge=lfs -text
300
+ onnx/up_blocks.0/onnx__MatMul_7538 filter=lfs diff=lfs merge=lfs -text
301
+ onnx/up_blocks.0/onnx__MatMul_7149 filter=lfs diff=lfs merge=lfs -text
302
+ onnx/up_blocks.0/onnx__MatMul_7490 filter=lfs diff=lfs merge=lfs -text
303
+ onnx/up_blocks.0/onnx__MatMul_7229 filter=lfs diff=lfs merge=lfs -text
304
+ onnx/up_blocks.0/onnx__MatMul_7573 filter=lfs diff=lfs merge=lfs -text
305
+ onnx/up_blocks.0/onnx__MatMul_7597 filter=lfs diff=lfs merge=lfs -text
306
+ onnx/up_blocks.0/onnx__MatMul_7181 filter=lfs diff=lfs merge=lfs -text
307
+ onnx/up_blocks.0/onnx__MatMul_7373 filter=lfs diff=lfs merge=lfs -text
308
+ onnx/up_blocks.0/onnx__MatMul_7253 filter=lfs diff=lfs merge=lfs -text
309
+ onnx/up_blocks.0/onnx__MatMul_7349 filter=lfs diff=lfs merge=lfs -text
310
+ onnx/up_blocks.0/onnx__MatMul_7301 filter=lfs diff=lfs merge=lfs -text
311
+ onnx/up_blocks.0/onnx__MatMul_7325 filter=lfs diff=lfs merge=lfs -text
312
+ onnx/up_blocks.0/onnx__MatMul_7205 filter=lfs diff=lfs merge=lfs -text
313
+ onnx/up_blocks.0/onnx__MatMul_7501 filter=lfs diff=lfs merge=lfs -text
314
+ onnx/up_blocks.0/onnx__MatMul_7429 filter=lfs diff=lfs merge=lfs -text
315
+ onnx/up_blocks.0/onnx__MatMul_7397 filter=lfs diff=lfs merge=lfs -text
316
+ onnx/up_blocks.0/onnx__MatMul_7453 filter=lfs diff=lfs merge=lfs -text
317
+ onnx/up_blocks.0/onnx__MatMul_6932 filter=lfs diff=lfs merge=lfs -text
318
+ onnx/up_blocks.0/onnx__MatMul_7348 filter=lfs diff=lfs merge=lfs -text
319
+ engine/up_blocks.2.plan filter=lfs diff=lfs merge=lfs -text
320
+ onnx/up_blocks.0/onnx__MatMul_7525 filter=lfs diff=lfs merge=lfs -text
321
+ onnx/up_blocks.0/onnx__MatMul_7477 filter=lfs diff=lfs merge=lfs -text
322
+ onnx/up_blocks.0/onnx__MatMul_7500 filter=lfs diff=lfs merge=lfs -text
323
+ onnx/up_blocks.0/onnx__MatMul_7644 filter=lfs diff=lfs merge=lfs -text
324
+ onnx/up_blocks.0/onnx__MatMul_6980 filter=lfs diff=lfs merge=lfs -text
325
+ onnx/up_blocks.0/onnx__MatMul_7428 filter=lfs diff=lfs merge=lfs -text
326
+ onnx/up_blocks.0/onnx__MatMul_6956 filter=lfs diff=lfs merge=lfs -text
327
+ onnx/up_blocks.0/onnx__MatMul_7572 filter=lfs diff=lfs merge=lfs -text
328
+ onnx/up_blocks.0/onnx__MatMul_7620 filter=lfs diff=lfs merge=lfs -text
329
+ onnx/up_blocks.0/resnets.0.conv2.weight filter=lfs diff=lfs merge=lfs -text
330
+ onnx/up_blocks.0/onnx__MatMul_7004 filter=lfs diff=lfs merge=lfs -text
331
+ onnx/up_blocks.0/resnets.2.conv2.weight filter=lfs diff=lfs merge=lfs -text
332
+ onnx/up_blocks.0/onnx__MatMul_7052 filter=lfs diff=lfs merge=lfs -text
333
+ onnx/up_blocks.0/onnx__MatMul_7148 filter=lfs diff=lfs merge=lfs -text
334
+ onnx/up_blocks.0/onnx__MatMul_7252 filter=lfs diff=lfs merge=lfs -text
335
+ onnx/up_blocks.0/onnx__MatMul_7100 filter=lfs diff=lfs merge=lfs -text
336
+ onnx/up_blocks.0/onnx__MatMul_7300 filter=lfs diff=lfs merge=lfs -text
337
+ onnx/up_blocks.0/onnx__MatMul_7452 filter=lfs diff=lfs merge=lfs -text
338
+ onnx/up_blocks.0/upsamplers.0.conv.weight filter=lfs diff=lfs merge=lfs -text
339
+ onnx/up_blocks.0/onnx__MatMul_7524 filter=lfs diff=lfs merge=lfs -text
340
+ onnx/up_blocks.0/onnx__MatMul_7476 filter=lfs diff=lfs merge=lfs -text
341
+ onnx/up_blocks.0/onnx__MatMul_7028 filter=lfs diff=lfs merge=lfs -text
342
+ onnx/up_blocks.0/resnets.1.conv2.weight filter=lfs diff=lfs merge=lfs -text
343
+ onnx/up_blocks.0/onnx__MatMul_7124 filter=lfs diff=lfs merge=lfs -text
344
+ onnx/up_blocks.0/onnx__MatMul_7396 filter=lfs diff=lfs merge=lfs -text
345
+ onnx/up_blocks.0/onnx__MatMul_7076 filter=lfs diff=lfs merge=lfs -text
346
+ onnx/up_blocks.0/onnx__MatMul_7372 filter=lfs diff=lfs merge=lfs -text
347
+ onnx/up_blocks.0/onnx__MatMul_7180 filter=lfs diff=lfs merge=lfs -text
348
+ onnx/up_blocks.0/onnx__MatMul_7228 filter=lfs diff=lfs merge=lfs -text
349
+ onnx/up_blocks.0/onnx__MatMul_7204 filter=lfs diff=lfs merge=lfs -text
350
+ onnx/up_blocks.0/onnx__MatMul_7548 filter=lfs diff=lfs merge=lfs -text
351
+ onnx/up_blocks.0/resnets.2.conv1.weight filter=lfs diff=lfs merge=lfs -text
352
+ onnx/up_blocks.0/onnx__MatMul_7324 filter=lfs diff=lfs merge=lfs -text
353
+ onnx/up_blocks.0/onnx__MatMul_7276 filter=lfs diff=lfs merge=lfs -text
354
+ onnx/up_blocks.0/onnx__MatMul_7596 filter=lfs diff=lfs merge=lfs -text
355
+ onnx/up_blocks.0/resnets.1.conv1.weight filter=lfs diff=lfs merge=lfs -text
356
+ onnx/up_blocks.0/resnets.0.conv1.weight filter=lfs diff=lfs merge=lfs -text
357
+ engine/down_blocks.1.plan filter=lfs diff=lfs merge=lfs -text
358
+ engine/up_blocks.1.plan filter=lfs diff=lfs merge=lfs -text
359
+ engine/mid_block.plan filter=lfs diff=lfs merge=lfs -text
360
+ engine/down_blocks.2.plan filter=lfs diff=lfs merge=lfs -text
361
+ engine/up_blocks.0.plan filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ **/__pycache__
2
+ **.egg-info
.gitmodules CHANGED
@@ -1,4 +1,3 @@
1
- [submodule "newdream-sdxl-20"]
2
  path = models/newdream-sdxl-20
3
  url = https://huggingface.co/stablediffusionapi/newdream-sdxl-20
4
- branch = main
 
1
+ [submodule "models/newdream-sdxl-20"]
2
  path = models/newdream-sdxl-20
3
  url = https://huggingface.co/stablediffusionapi/newdream-sdxl-20
 
engine/down_blocks.0.plan ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c4b70d1a0416aa494a3d7759349f9637c36c4e26729c3190f5a16bfc497694b
3
+ size 11712396
engine/down_blocks.1.plan ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21baa4ea0b4740f3cec8c35825dd6cc4dca7dc39cda3f6be8eab2ce1cd3e834f
3
+ size 124421828
engine/down_blocks.2.plan ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4412bf1d85e21eaac612aa9319b33f35b7e83d7858cca597bb6e645fa7bc7207
3
+ size 1522617884
engine/mid_block.plan ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:040b2e3dddac9efd68e125b02056a5cd4f8df7259e7167f14e57db8b68e26fb8
3
+ size 830401652
engine/up_blocks.0.plan ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cbc51a0d7f00e4d8b108bfda46a25df62b41623e084733173730774e7e15048
3
+ size 2425023084
engine/up_blocks.1.plan ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bd8a45dd815e72cdf75dd5df10a651c892b09f80a292edaafd4d192be419be3
3
+ size 218672972
engine/up_blocks.2.plan ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ee5d0d133198b30624c3d682b0438fa4485eeade489e7ab21ed425e1d947549
3
+ size 24347780
loss_params.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27ba04dc09bfe8325c2b8d8acbfa5fbf746f61169cf1cdfe07d028ad697217f1
3
- size 3568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e4c687fb455b7495e325d5f1761391d281323de6d2a493b153a3dac9536664e
3
+ size 3120
pyproject.toml CHANGED
@@ -8,13 +8,20 @@ description = "An edge-maxxing model submission for the 4090 newdream contest"
8
  requires-python = ">=3.10,<3.11"
9
  version = "6"
10
  dependencies = [
 
11
  "diffusers==0.30.2",
12
  "transformers==4.41.2",
13
  "accelerate==0.31.0",
14
  "omegaconf==2.3.0",
15
  "torch==2.4.1",
16
  "edge-maxxing-pipelines @ git+https://github.com/womboai/edge-maxxing@8d8ff45863416484b5b4bc547782591bbdfc696a#subdirectory=pipelines",
17
- "DeepCache",
 
 
 
 
 
 
18
  ]
19
 
20
  [project.scripts]
 
8
  requires-python = ">=3.10,<3.11"
9
  version = "6"
10
  dependencies = [
11
+ "wheel",
12
  "diffusers==0.30.2",
13
  "transformers==4.41.2",
14
  "accelerate==0.31.0",
15
  "omegaconf==2.3.0",
16
  "torch==2.4.1",
17
  "edge-maxxing-pipelines @ git+https://github.com/womboai/edge-maxxing@8d8ff45863416484b5b4bc547782591bbdfc696a#subdirectory=pipelines",
18
+ "polygraphy",
19
+ "onnx",
20
+ "tensorrt>=10.5.0",
21
+ "tensorrt-cu12-libs>=10.5.0",
22
+ "tensorrt-cu12-bindings>=10.5.0",
23
+ "cuda-python>=12.6.0",
24
+ "setuptools>=75.2.0",
25
  ]
26
 
27
  [project.scripts]
src/cache_diffusion/cachify.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: MIT
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining a
5
+ # copy of this software and associated documentation files (the "Software"),
6
+ # to deal in the Software without restriction, including without limitation
7
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
+ # and/or sell copies of the Software, and to permit persons to whom the
9
+ # Software is furnished to do so, subject to the following conditions:
10
+ #
11
+ # The above copyright notice and this permission notice shall be included in
12
+ # all copies or substantial portions of the Software.
13
+ #
14
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
+ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20
+ # DEALINGS IN THE SOFTWARE.
21
+
22
+ import fnmatch
23
+ from contextlib import contextmanager
24
+
25
+ from diffusers.models.attention import BasicTransformerBlock, JointTransformerBlock
26
+ from diffusers.models.transformers.pixart_transformer_2d import PixArtTransformer2DModel
27
+ from diffusers.models.transformers.transformer_sd3 import SD3Transformer2DModel
28
+ from diffusers.models.unets.unet_2d_blocks import (
29
+ CrossAttnDownBlock2D,
30
+ CrossAttnUpBlock2D,
31
+ DownBlock2D,
32
+ UNetMidBlock2DCrossAttn,
33
+ UpBlock2D,
34
+ )
35
+ from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
36
+ from diffusers.models.unets.unet_3d_blocks import (
37
+ CrossAttnDownBlockSpatioTemporal,
38
+ CrossAttnUpBlockSpatioTemporal,
39
+ DownBlockSpatioTemporal,
40
+ UNetMidBlockSpatioTemporal,
41
+ UpBlockSpatioTemporal,
42
+ )
43
+ from diffusers.models.unets.unet_spatio_temporal_condition import UNetSpatioTemporalConditionModel
44
+
45
+ from .module import CachedModule
46
+ from .utils import replace_module
47
+
48
+ CACHED_PIPE = {
49
+ UNet2DConditionModel: (
50
+ DownBlock2D,
51
+ CrossAttnDownBlock2D,
52
+ UNetMidBlock2DCrossAttn,
53
+ CrossAttnUpBlock2D,
54
+ UpBlock2D,
55
+ ),
56
+ PixArtTransformer2DModel: (BasicTransformerBlock),
57
+ UNetSpatioTemporalConditionModel: (
58
+ CrossAttnDownBlockSpatioTemporal,
59
+ DownBlockSpatioTemporal,
60
+ UpBlockSpatioTemporal,
61
+ CrossAttnUpBlockSpatioTemporal,
62
+ UNetMidBlockSpatioTemporal,
63
+ ),
64
+ SD3Transformer2DModel: (JointTransformerBlock),
65
+ }
66
+
67
+
68
+ def _apply_to_modules(model, action, modules=None, config_list=None):
69
+ if hasattr(model, "use_trt_infer") and model.use_trt_infer:
70
+ for key, module in model.engines.items():
71
+ if isinstance(module, CachedModule):
72
+ action(module)
73
+ elif config_list:
74
+ for config in config_list:
75
+ if _pass(key, config["wildcard_or_filter_func"]):
76
+ model.engines[key] = CachedModule(module, config["select_cache_step_func"])
77
+ else:
78
+ for name, module in model.named_modules():
79
+ if isinstance(module, CachedModule):
80
+ action(module)
81
+ elif modules and config_list:
82
+ for config in config_list:
83
+ if _pass(name, config["wildcard_or_filter_func"]) and isinstance(
84
+ module, modules
85
+ ):
86
+ replace_module(
87
+ model,
88
+ name,
89
+ CachedModule(module, config["select_cache_step_func"]),
90
+ )
91
+
92
+
93
+ def cachify(model, config_list, modules):
94
+ def cache_action(module):
95
+ pass # No action needed, caching is handled in the loop itself
96
+
97
+ _apply_to_modules(model, cache_action, modules, config_list)
98
+
99
+
100
+ def disable(pipe):
101
+ model = get_model(pipe)
102
+ _apply_to_modules(model, lambda module: module.disable_cache())
103
+
104
+
105
+ def enable(pipe):
106
+ model = get_model(pipe)
107
+ _apply_to_modules(model, lambda module: module.enable_cache())
108
+
109
+
110
+ def reset_status(pipe):
111
+ model = get_model(pipe)
112
+ _apply_to_modules(model, lambda module: setattr(module, "cur_step", 0))
113
+
114
+
115
+ def _pass(name, wildcard_or_filter_func):
116
+ if isinstance(wildcard_or_filter_func, str):
117
+ return fnmatch.fnmatch(name, wildcard_or_filter_func)
118
+ elif callable(wildcard_or_filter_func):
119
+ return wildcard_or_filter_func(name)
120
+ else:
121
+ raise NotImplementedError(f"Unsupported type {type(wildcard_or_filter_func)}")
122
+
123
+
124
+ def get_model(pipe):
125
+ if hasattr(pipe, "unet"):
126
+ return pipe.unet
127
+ elif hasattr(pipe, "transformer"):
128
+ return pipe.transformer
129
+ else:
130
+ raise KeyError
131
+
132
+
133
+ @contextmanager
134
+ def infer(pipe):
135
+ try:
136
+ yield pipe
137
+ finally:
138
+ reset_status(pipe)
139
+
140
+
141
+ def prepare(pipe, config_list):
142
+ model = get_model(pipe)
143
+ assert model.__class__ in CACHED_PIPE.keys(), f"{model.__class__} is not supported!"
144
+ cachify(model, config_list, CACHED_PIPE[model.__class__])
src/cache_diffusion/module.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: MIT
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining a
5
+ # copy of this software and associated documentation files (the "Software"),
6
+ # to deal in the Software without restriction, including without limitation
7
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
+ # and/or sell copies of the Software, and to permit persons to whom the
9
+ # Software is furnished to do so, subject to the following conditions:
10
+ #
11
+ # The above copyright notice and this permission notice shall be included in
12
+ # all copies or substantial portions of the Software.
13
+ #
14
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
+ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20
+ # DEALINGS IN THE SOFTWARE.
21
+
22
+ from torch import nn
23
+
24
+
25
+ class CachedModule(nn.Module):
26
+ def __init__(self, block, select_cache_step_func) -> None:
27
+ super().__init__()
28
+ self.block = block
29
+ self.select_cache_step_func = select_cache_step_func
30
+ self.cur_step = 0
31
+ self.cached_results = None
32
+ self.enabled = True
33
+
34
+ def __getattr__(self, name):
35
+ try:
36
+ return super().__getattr__(name)
37
+ except AttributeError:
38
+ return getattr(self.block, name)
39
+
40
+ def if_cache(self):
41
+ return self.select_cache_step_func(self.cur_step) and self.enabled
42
+
43
+ def enable_cache(self):
44
+ self.enabled = True
45
+
46
+ def disable_cache(self):
47
+ self.enabled = False
48
+ self.cur_step = 0
49
+
50
+ def forward(self, *args, **kwargs):
51
+ if not self.if_cache():
52
+ self.cached_results = self.block(*args, **kwargs)
53
+ if self.enabled:
54
+ self.cur_step += 1
55
+ return self.cached_results
src/cache_diffusion/utils.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: MIT
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining a
5
+ # copy of this software and associated documentation files (the "Software"),
6
+ # to deal in the Software without restriction, including without limitation
7
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
+ # and/or sell copies of the Software, and to permit persons to whom the
9
+ # Software is furnished to do so, subject to the following conditions:
10
+ #
11
+ # The above copyright notice and this permission notice shall be included in
12
+ # all copies or substantial portions of the Software.
13
+ #
14
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
+ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20
+ # DEALINGS IN THE SOFTWARE.
21
+
22
+ import re
23
+
24
+ SDXL_DEFAULT_CONFIG = [
25
+ {
26
+ "wildcard_or_filter_func": lambda name: "up_blocks.2" not in name,
27
+ "select_cache_step_func": lambda step: (step % 2) != 0,
28
+ }
29
+ ]
30
+
31
+ PIXART_DEFAULT_CONFIG = [
32
+ {
33
+ "wildcard_or_filter_func": lambda name: not re.search(
34
+ r"transformer_blocks\.(2[1-7])\.", name
35
+ ),
36
+ "select_cache_step_func": lambda step: (step % 3) != 0,
37
+ }
38
+ ]
39
+
40
+ SVD_DEFAULT_CONFIG = [
41
+ {
42
+ "wildcard_or_filter_func": lambda name: "up_blocks.3" not in name,
43
+ "select_cache_step_func": lambda step: (step % 2) != 0,
44
+ }
45
+ ]
46
+
47
+ SD3_DEFAULT_CONFIG = [
48
+ {
49
+ "wildcard_or_filter_func": lambda name: re.search(
50
+ r"^((?!transformer_blocks\.(1[6-9]|2[0-3])).)*$", name
51
+ ),
52
+ "select_cache_step_func": lambda step: (step % 2) != 0,
53
+ }
54
+ ]
55
+
56
+
57
+ def replace_module(parent, name_path, new_module):
58
+ path_parts = name_path.split(".")
59
+ for part in path_parts[:-1]:
60
+ parent = getattr(parent, part)
61
+ setattr(parent, path_parts[-1], new_module)
src/pipeline.py CHANGED
@@ -1,99 +1,57 @@
1
  import torch
 
2
  from PIL.Image import Image
3
- from diffusers import StableDiffusionXLPipeline, AutoPipelineForImage2Image
4
  from pipelines.models import TextToImageRequest
5
  from torch import Generator
6
- from DeepCache import DeepCacheSDHelper
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- '''
9
- def callback_dynamic_cfg(pipeline, step_index, timestep, callback_kwargs):
10
- if step_index == int(pipeline.num_timesteps * 0.5):
11
- callback_kwargs['prompt_embeds'] = callback_kwargs['prompt_embeds'].chunk(2)[-1]
12
- callback_kwargs['add_text_embeds'] = callback_kwargs['add_text_embeds'].chunk(2)[-1]
13
- callback_kwargs['add_time_ids'] = callback_kwargs['add_time_ids'].chunk(2)[-1]
14
- pipeline._guidance_scale = 0.0
15
-
16
- return callback_kwargs
17
- '''
18
-
19
- # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
20
-
21
- def load_pipeline() -> dict: #StableDiffusionXLPipeline, AutoPipelineForImage2Image:
22
-
23
- pipeline_dict = {}
24
- pipeline = StableDiffusionXLPipeline.from_pretrained(
25
- "./models/newdream-sdxl-20",
26
- torch_dtype=torch.float16,
27
- #local_files_only=True,
28
- use_safetensors=True,
29
- variant='fp16',
30
- ).to("cuda")
31
-
32
- refiner = AutoPipelineForImage2Image.from_pretrained(
33
- 'stabilityai/stable-diffusion-xl-refiner-1.0',
34
- use_safetensors=True,
35
- torch_dtype=torch.float16,
36
- variant='fp16',
37
- ).to('cuda')
38
-
39
-
40
- helper = DeepCacheSDHelper(pipe=pipeline)
41
- helper.set_params(cache_interval=3, cache_branch_id=0)
42
- helper.enable()
43
-
44
- refiner_helper = DeepCacheSDHelper(pipe=refiner)
45
- refiner_helper.set_params(cache_interval=3, cache_branch_id=0)
46
- refiner_helper.enable()
47
-
48
-
49
- for _ in range(5):
50
- pipeline(prompt="")
51
-
52
- pipeline_dict = {
53
- 'base_pipeline': pipeline,
54
- 'refiner': refiner
55
- }
56
- return pipeline_dict #base_pipeline, refiner
57
-
58
-
59
- def infer(request: TextToImageRequest, pipeline_dict: dict) -> Image: #pipeline: StableDiffusionXLPipeline, refiner: AutoPipelineForImage2Image) -> Image:
60
  if request.seed is None:
61
  generator = None
62
  else:
63
- generator = Generator(pipeline_dict['base_pipeline'].device).manual_seed(request.seed)
64
-
65
-
66
- image = pipeline_dict['base_pipeline'](
67
- prompt=request.prompt,
68
- negative_prompt=request.negative_prompt,
69
- width=request.width,
70
- height=request.height,
71
- generator=generator,
72
- num_inference_steps=27,
73
- denoising_end=0.8,
74
- output_type='latent',
75
- ).images
76
-
77
- return pipeline_dict['refiner'](
78
- prompt=request.prompt,
79
- negative_prompt=request.negative_prompt,
80
- width=request.width,
81
- height=request.height,
82
- generator=generator,
83
- num_inference_steps=27,
84
- denoising_start=0.8,
85
- image=image,
86
- ).images[0]
87
-
88
- '''
89
- return pipeline(
90
- prompt=request.prompt,
91
- negative_prompt=request.negative_prompt,
92
- width=request.width,
93
- height=request.height,
94
- generator=generator,
95
- num_inference_steps=27,
96
- #callback_on_step_end=callback_dynamic_cfg,
97
- #callback_on_step_end_tensor_inputs=['prompt_embeds', 'add_text_embeds', 'add_time_ids'],
98
- ).images[0]
99
- '''
 
1
  import torch
2
+ from pathlib import Path
3
  from PIL.Image import Image
4
+ from diffusers import StableDiffusionXLPipeline, DDIMScheduler
5
  from pipelines.models import TextToImageRequest
6
  from torch import Generator
7
+ from cache_diffusion import cachify
8
+ from trt_pipeline.deploy import load_unet_trt
9
+ from loss import SchedulerWrapper
10
+ import numpy as np
11
+
12
+
13
+ generator = Generator(torch.device("cuda")).manual_seed(69)
14
+
15
+ SDXL_DEFAULT_CONFIG = [
16
+ {
17
+ "wildcard_or_filter_func": lambda name: "down_blocks.2" not in name and"down_blocks.3" not in name and "up_blocks.2" not in name,
18
+ "select_cache_step_func": lambda step: (step % 2 != 0) and (step >= 10),
19
+ }]
20
+ def load_pipeline() -> StableDiffusionXLPipeline:
21
+ pipe = StableDiffusionXLPipeline.from_pretrained(
22
+ "models/newdream-sdxl-20", torch_dtype=torch.float16, use_safetensors=True, local_files_only=True
23
+ ).to("cuda")
24
+ load_unet_trt(
25
+ pipe.unet,
26
+ engine_path=Path("./engine"),
27
+ batch_size=1,
28
+ )
29
+ cachify.prepare(pipe, SDXL_DEFAULT_CONFIG)
30
+ cachify.enable(pipe)
31
+ pipe.scheduler = SchedulerWrapper(DDIMScheduler.from_config(pipe.scheduler.config))
32
+ with cachify.infer(pipe) as cached_pipe:
33
+ for _ in range(4):
34
+ pipe(prompt="a photo of table", num_inference_steps=14)
35
+ cachify.disable(pipe)
36
+ pipe.scheduler.prepare_loss()
37
+ return pipe
38
+
39
+ def infer(request: TextToImageRequest, pipeline: StableDiffusionXLPipeline) -> Image:
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  if request.seed is None:
42
  generator = None
43
  else:
44
+ generator = Generator(pipeline.device).manual_seed(request.seed)
45
+ cachify.prepare(pipeline, SDXL_DEFAULT_CONFIG)
46
+ cachify.enable(pipeline)
47
+ with cachify.infer(pipeline) as cached_pipe:
48
+ image = cached_pipe(
49
+ prompt=request.prompt,
50
+ negative_prompt=request.negative_prompt,
51
+ width=request.width,
52
+ height=request.height,
53
+ generator=generator,
54
+ num_inference_steps=16,
55
+ ).images[0]
56
+ filtered_image = pixel_filter(image)
57
+ return filtered_image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/trt_pipeline/config.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: MIT
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining a
5
+ # copy of this software and associated documentation files (the "Software"),
6
+ # to deal in the Software without restriction, including without limitation
7
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
+ # and/or sell copies of the Software, and to permit persons to whom the
9
+ # Software is furnished to do so, subject to the following conditions:
10
+ #
11
+ # The above copyright notice and this permission notice shall be included in
12
+ # all copies or substantial portions of the Software.
13
+ #
14
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
+ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20
+ # DEALINGS IN THE SOFTWARE.
21
+ from diffusers.models.transformers.transformer_sd3 import SD3Transformer2DModel
22
+ from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
23
+
24
+ sd3_common_transformer_block_config = {
25
+ "dummy_input": {
26
+ "hidden_states": (2, 4096, 1536),
27
+ "encoder_hidden_states": (2, 333, 1536),
28
+ "temb": (2, 1536),
29
+ },
30
+ "output_names": ["encoder_hidden_states_out", "hidden_states_out"],
31
+ "dynamic_axes": {
32
+ "hidden_states": {0: "batch_size"},
33
+ "encoder_hidden_states": {0: "batch_size"},
34
+ "temb": {0: "steps"},
35
+ },
36
+ }
37
+
38
+ ONNX_CONFIG = {
39
+ UNet2DConditionModel: {
40
+ "down_blocks.0": {
41
+ "dummy_input": {
42
+ "hidden_states": (2, 320, 128, 128),
43
+ "temb": (2, 1280),
44
+ },
45
+ "output_names": ["sample", "res_samples_0", "res_samples_1", "res_samples_2"],
46
+ "dynamic_axes": {
47
+ "hidden_states": {0: "batch_size"},
48
+ "temb": {0: "steps"},
49
+ },
50
+ },
51
+ "down_blocks.1": {
52
+ "dummy_input": {
53
+ "hidden_states": (2, 320, 64, 64),
54
+ "temb": (2, 1280),
55
+ "encoder_hidden_states": (2, 77, 2048),
56
+ },
57
+ "output_names": ["sample", "res_samples_0", "res_samples_1", "res_samples_2"],
58
+ "dynamic_axes": {
59
+ "hidden_states": {0: "batch_size"},
60
+ "temb": {0: "steps"},
61
+ "encoder_hidden_states": {0: "batch_size"},
62
+ },
63
+ },
64
+ "down_blocks.2": {
65
+ "dummy_input": {
66
+ "hidden_states": (2, 640, 32, 32),
67
+ "temb": (2, 1280),
68
+ "encoder_hidden_states": (2, 77, 2048),
69
+ },
70
+ "output_names": ["sample", "res_samples_0", "res_samples_1"],
71
+ "dynamic_axes": {
72
+ "hidden_states": {0: "batch_size"},
73
+ "temb": {0: "steps"},
74
+ "encoder_hidden_states": {0: "batch_size"},
75
+ },
76
+ },
77
+ "mid_block": {
78
+ "dummy_input": {
79
+ "hidden_states": (2, 1280, 32, 32),
80
+ "temb": (2, 1280),
81
+ "encoder_hidden_states": (2, 77, 2048),
82
+ },
83
+ "output_names": ["sample"],
84
+ "dynamic_axes": {
85
+ "hidden_states": {0: "batch_size"},
86
+ "temb": {0: "steps"},
87
+ "encoder_hidden_states": {0: "batch_size"},
88
+ },
89
+ },
90
+ "up_blocks.0": {
91
+ "dummy_input": {
92
+ "hidden_states": (2, 1280, 32, 32),
93
+ "res_hidden_states_0": (2, 640, 32, 32),
94
+ "res_hidden_states_1": (2, 1280, 32, 32),
95
+ "res_hidden_states_2": (2, 1280, 32, 32),
96
+ "temb": (2, 1280),
97
+ "encoder_hidden_states": (2, 77, 2048),
98
+ },
99
+ "output_names": ["sample"],
100
+ "dynamic_axes": {
101
+ "hidden_states": {0: "batch_size"},
102
+ "temb": {0: "steps"},
103
+ "encoder_hidden_states": {0: "batch_size"},
104
+ "res_hidden_states_0": {0: "batch_size"},
105
+ "res_hidden_states_1": {0: "batch_size"},
106
+ "res_hidden_states_2": {0: "batch_size"},
107
+ },
108
+ },
109
+ "up_blocks.1": {
110
+ "dummy_input": {
111
+ "hidden_states": (2, 1280, 64, 64),
112
+ "res_hidden_states_0": (2, 320, 64, 64),
113
+ "res_hidden_states_1": (2, 640, 64, 64),
114
+ "res_hidden_states_2": (2, 640, 64, 64),
115
+ "temb": (2, 1280),
116
+ "encoder_hidden_states": (2, 77, 2048),
117
+ },
118
+ "output_names": ["sample"],
119
+ "dynamic_axes": {
120
+ "hidden_states": {0: "batch_size"},
121
+ "temb": {0: "steps"},
122
+ "encoder_hidden_states": {0: "batch_size"},
123
+ "res_hidden_states_0": {0: "batch_size"},
124
+ "res_hidden_states_1": {0: "batch_size"},
125
+ "res_hidden_states_2": {0: "batch_size"},
126
+ },
127
+ },
128
+ "up_blocks.2": {
129
+ "dummy_input": {
130
+ "hidden_states": (2, 640, 128, 128),
131
+ "res_hidden_states_0": (2, 320, 128, 128),
132
+ "res_hidden_states_1": (2, 320, 128, 128),
133
+ "res_hidden_states_2": (2, 320, 128, 128),
134
+ "temb": (2, 1280),
135
+ },
136
+ "output_names": ["sample"],
137
+ "dynamic_axes": {
138
+ "hidden_states": {0: "batch_size"},
139
+ "temb": {0: "steps"},
140
+ "res_hidden_states_0": {0: "batch_size"},
141
+ "res_hidden_states_1": {0: "batch_size"},
142
+ "res_hidden_states_2": {0: "batch_size"},
143
+ },
144
+ },
145
+ },
146
+ SD3Transformer2DModel: {
147
+ **{f"transformer_blocks.{i}": sd3_common_transformer_block_config for i in range(23)},
148
+ "transformer_blocks.23": {
149
+ "dummy_input": {
150
+ "hidden_states": (2, 4096, 1536),
151
+ "encoder_hidden_states": (2, 333, 1536),
152
+ "temb": (2, 1536),
153
+ },
154
+ "output_names": ["hidden_states_out"],
155
+ "dynamic_axes": {
156
+ "hidden_states": {0: "batch_size"},
157
+ "encoder_hidden_states": {0: "batch_size"},
158
+ "temb": {0: "steps"},
159
+ },
160
+ },
161
+ },
162
+ }
src/trt_pipeline/deploy.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: MIT
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining a
5
+ # copy of this software and associated documentation files (the "Software"),
6
+ # to deal in the Software without restriction, including without limitation
7
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
+ # and/or sell copies of the Software, and to permit persons to whom the
9
+ # Software is furnished to do so, subject to the following conditions:
10
+ #
11
+ # The above copyright notice and this permission notice shall be included in
12
+ # all copies or substantial portions of the Software.
13
+ #
14
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
+ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20
+ # DEALINGS IN THE SOFTWARE.
21
+
22
+ import types
23
+ from pathlib import Path
24
+
25
+ import tensorrt as trt
26
+ import torch
27
+ from cache_diffusion.cachify import CACHED_PIPE, get_model
28
+ from cuda import cudart
29
+ from diffusers.models.transformers.transformer_sd3 import SD3Transformer2DModel
30
+ from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
31
+ from trt_pipeline.config import ONNX_CONFIG
32
+ from trt_pipeline.models.sd3 import sd3_forward
33
+ from trt_pipeline.models.sdxl import (
34
+ cachecrossattnupblock2d_forward,
35
+ cacheunet_forward,
36
+ cacheupblock2d_forward,
37
+ )
38
+ from polygraphy.backend.trt import (
39
+ CreateConfig,
40
+ Profile,
41
+ engine_from_network,
42
+ network_from_onnx_path,
43
+ save_engine,
44
+ )
45
+ from torch.onnx import export as onnx_export
46
+
47
+ from .utils import Engine
48
+
49
+
50
+ def replace_new_forward(backbone):
51
+ if backbone.__class__ == UNet2DConditionModel:
52
+ backbone.forward = types.MethodType(cacheunet_forward, backbone)
53
+ for upsample_block in backbone.up_blocks:
54
+ if (
55
+ hasattr(upsample_block, "has_cross_attention")
56
+ and upsample_block.has_cross_attention
57
+ ):
58
+ upsample_block.forward = types.MethodType(
59
+ cachecrossattnupblock2d_forward, upsample_block
60
+ )
61
+ else:
62
+ upsample_block.forward = types.MethodType(cacheupblock2d_forward, upsample_block)
63
+ elif backbone.__class__ == SD3Transformer2DModel:
64
+ backbone.forward = types.MethodType(sd3_forward, backbone)
65
+
66
+
67
+ def get_input_info(dummy_dict, info: str = None, batch_size: int = 1):
68
+ return_val = [] if info == "profile_shapes" or info == "input_names" else {}
69
+
70
+ def collect_leaf_keys(d):
71
+ for key, value in d.items():
72
+ if isinstance(value, dict):
73
+ collect_leaf_keys(value)
74
+ else:
75
+ value = (value[0] * batch_size,) + value[1:]
76
+ if info == "profile_shapes":
77
+ return_val.append((key, value)) # type: ignore
78
+ elif info == "profile_shapes_dict":
79
+ return_val[key] = value # type: ignore
80
+ elif info == "dummy_input":
81
+ return_val[key] = torch.ones(value).half().cuda() # type: ignore
82
+ elif info == "input_names":
83
+ return_val.append(key) # type: ignore
84
+
85
+ collect_leaf_keys(dummy_dict)
86
+ return return_val
87
+
88
+
89
+ def get_total_device_memory(backbone):
90
+ max_device_memory = 0
91
+ for _, engine in backbone.engines.items():
92
+ max_device_memory = max(max_device_memory, engine.engine.device_memory_size)
93
+ return max_device_memory
94
+
95
+
96
+ def load_engines(backbone, engine_path: Path, batch_size: int = 1):
97
+ backbone.engines = {}
98
+ for f in engine_path.iterdir():
99
+ if f.is_file():
100
+ eng = Engine()
101
+ eng.load(str(f))
102
+ backbone.engines[f"{f.stem}"] = eng
103
+ _, shared_device_memory = cudart.cudaMalloc(get_total_device_memory(backbone))
104
+ for engine in backbone.engines.values():
105
+ engine.activate(shared_device_memory)
106
+ backbone.cuda_stream = cudart.cudaStreamCreate()[1]
107
+ for block_name in backbone.engines.keys():
108
+ backbone.engines[block_name].allocate_buffers(
109
+ shape_dict=get_input_info(
110
+ ONNX_CONFIG[backbone.__class__][block_name]["dummy_input"],
111
+ "profile_shapes_dict",
112
+ batch_size,
113
+ ),
114
+ device=backbone.device,
115
+ batch_size=batch_size,
116
+ )
117
+ # TODO: Free and clean up the origin pytorch cuda memory
118
+
119
+
120
+ def warm_up(backbone, batch_size: int = 1):
121
+ print("Warming-up TensorRT engines...")
122
+ for name, engine in backbone.engines.items():
123
+ dummy_input = get_input_info(
124
+ ONNX_CONFIG[backbone.__class__][name]["dummy_input"], "dummy_input", batch_size
125
+ )
126
+ _ = engine(dummy_input, backbone.cuda_stream)
127
+
128
+
129
+ def teardown(pipe):
130
+ backbone = get_model(pipe)
131
+ for engine in backbone.engines.values():
132
+ del engine
133
+
134
+ cudart.cudaStreamDestroy(backbone.cuda_stream)
135
+ del backbone.cuda_stream
136
+
137
+
138
+ def load_unet_trt(unet, engine_path: Path, batch_size: int = 1):
139
+ backbone = unet
140
+ engine_path.mkdir(parents=True, exist_ok=True)
141
+ replace_new_forward(backbone)
142
+ load_engines(backbone, engine_path, batch_size)
143
+ warm_up(backbone, batch_size)
144
+ backbone.use_trt_infer = True
src/trt_pipeline/models/sd3.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: MIT
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining a
5
+ # copy of this software and associated documentation files (the "Software"),
6
+ # to deal in the Software without restriction, including without limitation
7
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
+ # and/or sell copies of the Software, and to permit persons to whom the
9
+ # Software is furnished to do so, subject to the following conditions:
10
+ #
11
+ # The above copyright notice and this permission notice shall be included in
12
+ # all copies or substantial portions of the Software.
13
+ #
14
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
+ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20
+ # DEALINGS IN THE SOFTWARE.
21
+
22
+ from typing import Any, Dict, List, Optional, Union
23
+
24
+ import torch
25
+ from diffusers.models.modeling_outputs import Transformer2DModelOutput
26
+ from diffusers.utils import (
27
+ USE_PEFT_BACKEND,
28
+ is_torch_version,
29
+ scale_lora_layers,
30
+ unscale_lora_layers,
31
+ )
32
+
33
+
34
+ def sd3_forward(
35
+ self,
36
+ hidden_states: torch.FloatTensor,
37
+ encoder_hidden_states: torch.FloatTensor = None,
38
+ pooled_projections: torch.FloatTensor = None,
39
+ timestep: torch.LongTensor = None,
40
+ block_controlnet_hidden_states: List = None,
41
+ joint_attention_kwargs: Optional[Dict[str, Any]] = None,
42
+ return_dict: bool = True,
43
+ ) -> Union[torch.FloatTensor, Transformer2DModelOutput]:
44
+ """
45
+ The [`SD3Transformer2DModel`] forward method.
46
+
47
+ Args:
48
+ hidden_states (`torch.FloatTensor` of shape `(batch size, channel, height, width)`):
49
+ Input `hidden_states`.
50
+ encoder_hidden_states (`torch.FloatTensor` of shape `(batch size, sequence_len, embed_dims)`):
51
+ Conditional embeddings (embeddings computed from the input conditions such as prompts) to use.
52
+ pooled_projections (`torch.FloatTensor` of shape `(batch_size, projection_dim)`): Embeddings projected
53
+ from the embeddings of input conditions.
54
+ timestep ( `torch.LongTensor`):
55
+ Used to indicate denoising step.
56
+ block_controlnet_hidden_states: (`list` of `torch.Tensor`):
57
+ A list of tensors that if specified are added to the residuals of transformer blocks.
58
+ joint_attention_kwargs (`dict`, *optional*):
59
+ A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
60
+ `self.processor` in
61
+ [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
62
+ return_dict (`bool`, *optional*, defaults to `True`):
63
+ Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain
64
+ tuple.
65
+
66
+ Returns:
67
+ If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a
68
+ `tuple` where the first element is the sample tensor.
69
+ """
70
+ if joint_attention_kwargs is not None:
71
+ joint_attention_kwargs = joint_attention_kwargs.copy()
72
+ lora_scale = joint_attention_kwargs.pop("scale", 1.0)
73
+ else:
74
+ lora_scale = 1.0
75
+
76
+ if USE_PEFT_BACKEND:
77
+ # weight the lora layers by setting `lora_scale` for each PEFT layer
78
+ scale_lora_layers(self, lora_scale)
79
+
80
+ height, width = hidden_states.shape[-2:]
81
+
82
+ hidden_states = self.pos_embed(hidden_states) # takes care of adding positional embeddings too.
83
+ temb = self.time_text_embed(timestep, pooled_projections)
84
+ encoder_hidden_states = self.context_embedder(encoder_hidden_states)
85
+
86
+ for index_block, block in enumerate(self.transformer_blocks):
87
+ if self.training and self.gradient_checkpointing:
88
+
89
+ def create_custom_forward(module, return_dict=None):
90
+ def custom_forward(*inputs):
91
+ if return_dict is not None:
92
+ return module(*inputs, return_dict=return_dict)
93
+ else:
94
+ return module(*inputs)
95
+
96
+ return custom_forward
97
+
98
+ ckpt_kwargs: Dict[str, Any] = (
99
+ {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
100
+ )
101
+ encoder_hidden_states, hidden_states = torch.utils.checkpoint.checkpoint(
102
+ create_custom_forward(block),
103
+ hidden_states,
104
+ encoder_hidden_states,
105
+ temb,
106
+ **ckpt_kwargs,
107
+ )
108
+
109
+ else:
110
+ if hasattr(self, "use_trt_infer") and self.use_trt_infer:
111
+ feed_dict = {
112
+ "hidden_states": hidden_states,
113
+ "encoder_hidden_states": encoder_hidden_states,
114
+ "temb": temb,
115
+ }
116
+ _results = self.engines[f"transformer_blocks.{index_block}"](
117
+ feed_dict, self.cuda_stream
118
+ )
119
+ if index_block != 23:
120
+ encoder_hidden_states = _results["encoder_hidden_states_out"]
121
+ hidden_states = _results["hidden_states_out"]
122
+ else:
123
+ encoder_hidden_states, hidden_states = block(
124
+ hidden_states=hidden_states,
125
+ encoder_hidden_states=encoder_hidden_states,
126
+ temb=temb,
127
+ )
128
+
129
+ # controlnet residual
130
+ if block_controlnet_hidden_states is not None and block.context_pre_only is False:
131
+ interval_control = len(self.transformer_blocks) // len(block_controlnet_hidden_states)
132
+ hidden_states = (
133
+ hidden_states + block_controlnet_hidden_states[index_block // interval_control]
134
+ )
135
+
136
+ hidden_states = self.norm_out(hidden_states, temb)
137
+ hidden_states = self.proj_out(hidden_states)
138
+
139
+ # unpatchify
140
+ patch_size = self.config.patch_size
141
+ height = height // patch_size
142
+ width = width // patch_size
143
+
144
+ hidden_states = hidden_states.reshape(
145
+ shape=(hidden_states.shape[0], height, width, patch_size, patch_size, self.out_channels)
146
+ )
147
+ hidden_states = torch.einsum("nhwpqc->nchpwq", hidden_states)
148
+ output = hidden_states.reshape(
149
+ shape=(hidden_states.shape[0], self.out_channels, height * patch_size, width * patch_size)
150
+ )
151
+
152
+ if USE_PEFT_BACKEND:
153
+ # remove `lora_scale` from each PEFT layer
154
+ unscale_lora_layers(self, lora_scale)
155
+
156
+ if not return_dict:
157
+ return (output,)
158
+
159
+ return Transformer2DModelOutput(sample=output)
src/trt_pipeline/models/sdxl.py ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Adapted from
2
+ # https://github.com/huggingface/diffusers/blob/73acebb8cfbd1d2954cabe1af4185f9994e61917/src/diffusers/models/unets/unet_2d_condition.py#L1039-L1312
3
+ # https://github.com/huggingface/diffusers/blob/73acebb8cfbd1d2954cabe1af4185f9994e61917/src/diffusers/models/unets/unet_2d_blocks.py#L2482-L2564
4
+ # https://github.com/huggingface/diffusers/blob/73acebb8cfbd1d2954cabe1af4185f9994e61917/src/diffusers/models/unets/unet_2d_blocks.py#L2617-L2679
5
+
6
+ # Copyright 2024 The HuggingFace Team. All rights reserved.
7
+ #
8
+ # Licensed under the Apache License, Version 2.0 (the "License");
9
+ # you may not use this file except in compliance with the License.
10
+ # You may obtain a copy of the License at
11
+ #
12
+ # http://www.apache.org/licenses/LICENSE-2.0
13
+ #
14
+ # Unless required by applicable law or agreed to in writing, software
15
+ # distributed under the License is distributed on an "AS IS" BASIS,
16
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
+ # See the License for the specific language governing permissions and
18
+ # limitations under the License.
19
+ #
20
+ # Not a contribution
21
+ # Changes made by NVIDIA CORPORATION & AFFILIATES or otherwise documented as
22
+ # NVIDIA-proprietary are not a contribution and subject to the following terms and conditions:
23
+ # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
24
+ # SPDX-License-Identifier: LicenseRef-NvidiaProprietary
25
+ #
26
+ # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
27
+ # property and proprietary rights in and to this material, related
28
+ # documentation and any modifications thereto. Any use, reproduction,
29
+ # disclosure or distribution of this material and related documentation
30
+ # without an express license agreement from NVIDIA CORPORATION or
31
+ # its affiliates is strictly prohibited.
32
+
33
+ from typing import Any, Dict, Optional, Tuple, Union
34
+
35
+ import torch
36
+ from diffusers.models.unets.unet_2d_condition import UNet2DConditionOutput
37
+
38
+
39
+ def cachecrossattnupblock2d_forward(
40
+ self,
41
+ hidden_states: torch.FloatTensor,
42
+ res_hidden_states_0: torch.FloatTensor,
43
+ res_hidden_states_1: torch.FloatTensor,
44
+ res_hidden_states_2: torch.FloatTensor,
45
+ temb: Optional[torch.FloatTensor] = None,
46
+ encoder_hidden_states: Optional[torch.FloatTensor] = None,
47
+ cross_attention_kwargs: Optional[Dict[str, Any]] = None,
48
+ upsample_size: Optional[int] = None,
49
+ attention_mask: Optional[torch.FloatTensor] = None,
50
+ encoder_attention_mask: Optional[torch.FloatTensor] = None,
51
+ ) -> torch.FloatTensor:
52
+ res_hidden_states_tuple = (res_hidden_states_0, res_hidden_states_1, res_hidden_states_2)
53
+ for resnet, attn in zip(self.resnets, self.attentions):
54
+ # pop res hidden states
55
+ res_hidden_states = res_hidden_states_tuple[-1]
56
+ res_hidden_states_tuple = res_hidden_states_tuple[:-1]
57
+
58
+ hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1)
59
+
60
+ hidden_states = resnet(hidden_states, temb)
61
+ hidden_states = attn(
62
+ hidden_states,
63
+ encoder_hidden_states=encoder_hidden_states,
64
+ cross_attention_kwargs=cross_attention_kwargs,
65
+ attention_mask=attention_mask,
66
+ encoder_attention_mask=encoder_attention_mask,
67
+ return_dict=False,
68
+ )[0]
69
+
70
+ if self.upsamplers is not None:
71
+ for upsampler in self.upsamplers:
72
+ hidden_states = upsampler(hidden_states, upsample_size)
73
+
74
+ return hidden_states
75
+
76
+
77
+ def cacheupblock2d_forward(
78
+ self,
79
+ hidden_states: torch.FloatTensor,
80
+ res_hidden_states_0: torch.FloatTensor,
81
+ res_hidden_states_1: torch.FloatTensor,
82
+ res_hidden_states_2: torch.FloatTensor,
83
+ temb: Optional[torch.FloatTensor] = None,
84
+ upsample_size: Optional[int] = None,
85
+ ) -> torch.FloatTensor:
86
+ res_hidden_states_tuple = (res_hidden_states_0, res_hidden_states_1, res_hidden_states_2)
87
+ for resnet in self.resnets:
88
+ # pop res hidden states
89
+ res_hidden_states = res_hidden_states_tuple[-1]
90
+ res_hidden_states_tuple = res_hidden_states_tuple[:-1]
91
+
92
+ hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1)
93
+
94
+ hidden_states = resnet(hidden_states, temb)
95
+
96
+ if self.upsamplers is not None:
97
+ for upsampler in self.upsamplers:
98
+ hidden_states = upsampler(hidden_states, upsample_size)
99
+
100
+ return hidden_states
101
+
102
+
103
+ def cacheunet_forward(
104
+ self,
105
+ sample: torch.FloatTensor,
106
+ timestep: Union[torch.Tensor, float, int],
107
+ encoder_hidden_states: torch.Tensor,
108
+ class_labels: Optional[torch.Tensor] = None,
109
+ timestep_cond: Optional[torch.Tensor] = None,
110
+ attention_mask: Optional[torch.Tensor] = None,
111
+ cross_attention_kwargs: Optional[Dict[str, Any]] = None,
112
+ added_cond_kwargs: Optional[Dict[str, torch.Tensor]] = None,
113
+ down_block_additional_residuals: Optional[Tuple[torch.Tensor]] = None,
114
+ mid_block_additional_residual: Optional[torch.Tensor] = None,
115
+ down_intrablock_additional_residuals: Optional[Tuple[torch.Tensor]] = None,
116
+ encoder_attention_mask: Optional[torch.Tensor] = None,
117
+ return_dict: bool = True,
118
+ ) -> Union[UNet2DConditionOutput, Tuple]:
119
+ # 1. time
120
+ t_emb = self.get_time_embed(sample=sample, timestep=timestep)
121
+ emb = self.time_embedding(t_emb, timestep_cond)
122
+ aug_emb = None
123
+
124
+ aug_emb = self.get_aug_embed(
125
+ emb=emb,
126
+ encoder_hidden_states=encoder_hidden_states,
127
+ added_cond_kwargs=added_cond_kwargs,
128
+ )
129
+
130
+ emb = emb + aug_emb if aug_emb is not None else emb
131
+
132
+ encoder_hidden_states = self.process_encoder_hidden_states(
133
+ encoder_hidden_states=encoder_hidden_states, added_cond_kwargs=added_cond_kwargs
134
+ )
135
+
136
+ # 2. pre-process
137
+ sample = self.conv_in(sample)
138
+
139
+ if hasattr(self, "_export_precess_onnx") and self._export_precess_onnx:
140
+ return (
141
+ sample,
142
+ encoder_hidden_states,
143
+ emb,
144
+ )
145
+
146
+ down_block_res_samples = (sample,)
147
+ for i, downsample_block in enumerate(self.down_blocks):
148
+ if (
149
+ hasattr(downsample_block, "has_cross_attention")
150
+ and downsample_block.has_cross_attention
151
+ ):
152
+ if hasattr(self, "use_trt_infer") and self.use_trt_infer:
153
+ feed_dict = {
154
+ "hidden_states": sample,
155
+ "temb": emb,
156
+ "encoder_hidden_states": encoder_hidden_states,
157
+ }
158
+ down_results = self.engines[f"down_blocks.{i}"](feed_dict, self.cuda_stream)
159
+ sample = down_results["sample"]
160
+ res_samples_0 = down_results["res_samples_0"]
161
+ res_samples_1 = down_results["res_samples_1"]
162
+ if "res_samples_2" in down_results.keys():
163
+ res_samples_2 = down_results["res_samples_2"]
164
+ else:
165
+ # For t2i-adapter CrossAttnDownBlock2D
166
+ additional_residuals = {}
167
+
168
+ sample, res_samples = downsample_block(
169
+ hidden_states=sample,
170
+ temb=emb,
171
+ encoder_hidden_states=encoder_hidden_states,
172
+ attention_mask=attention_mask,
173
+ cross_attention_kwargs=cross_attention_kwargs,
174
+ encoder_attention_mask=encoder_attention_mask,
175
+ **additional_residuals,
176
+ )
177
+ else:
178
+ if hasattr(self, "use_trt_infer") and self.use_trt_infer:
179
+ feed_dict = {"hidden_states": sample, "temb": emb}
180
+ down_results = self.engines[f"down_blocks.{i}"](feed_dict, self.cuda_stream)
181
+ sample = down_results["sample"]
182
+ res_samples_0 = down_results["res_samples_0"]
183
+ res_samples_1 = down_results["res_samples_1"]
184
+ if "res_samples_2" in down_results.keys():
185
+ res_samples_2 = down_results["res_samples_2"]
186
+ else:
187
+ sample, res_samples = downsample_block(hidden_states=sample, temb=emb)
188
+
189
+ if hasattr(self, "use_trt_infer") and self.use_trt_infer:
190
+ down_block_res_samples += (
191
+ res_samples_0,
192
+ res_samples_1,
193
+ )
194
+ if "res_samples_2" in down_results.keys():
195
+ down_block_res_samples += (res_samples_2,)
196
+ else:
197
+ down_block_res_samples += res_samples
198
+
199
+ if hasattr(self, "use_trt_infer") and self.use_trt_infer:
200
+ feed_dict = {
201
+ "hidden_states": sample,
202
+ "temb": emb,
203
+ "encoder_hidden_states": encoder_hidden_states,
204
+ }
205
+ mid_results = self.engines["mid_block"](feed_dict, self.cuda_stream)
206
+ sample = mid_results["sample"]
207
+ else:
208
+ sample = self.mid_block(
209
+ sample,
210
+ emb,
211
+ encoder_hidden_states=encoder_hidden_states,
212
+ attention_mask=attention_mask,
213
+ cross_attention_kwargs=cross_attention_kwargs,
214
+ encoder_attention_mask=encoder_attention_mask,
215
+ )
216
+
217
+ # 5. up
218
+ for i, upsample_block in enumerate(self.up_blocks):
219
+ res_samples = down_block_res_samples[-len(upsample_block.resnets) :]
220
+ down_block_res_samples = down_block_res_samples[: -len(upsample_block.resnets)]
221
+
222
+ if hasattr(upsample_block, "has_cross_attention") and upsample_block.has_cross_attention:
223
+ if hasattr(self, "use_trt_infer") and self.use_trt_infer:
224
+ feed_dict = {
225
+ "hidden_states": sample,
226
+ "res_hidden_states_0": res_samples[0],
227
+ "res_hidden_states_1": res_samples[1],
228
+ "res_hidden_states_2": res_samples[2],
229
+ "temb": emb,
230
+ "encoder_hidden_states": encoder_hidden_states,
231
+ }
232
+ up_results = self.engines[f"up_blocks.{i}"](feed_dict, self.cuda_stream)
233
+ sample = up_results["sample"]
234
+ else:
235
+ sample = upsample_block(
236
+ hidden_states=sample,
237
+ temb=emb,
238
+ res_hidden_states_0=res_samples[0],
239
+ res_hidden_states_1=res_samples[1],
240
+ res_hidden_states_2=res_samples[2],
241
+ encoder_hidden_states=encoder_hidden_states,
242
+ cross_attention_kwargs=cross_attention_kwargs,
243
+ attention_mask=attention_mask,
244
+ encoder_attention_mask=encoder_attention_mask,
245
+ )
246
+ else:
247
+ if hasattr(self, "use_trt_infer") and self.use_trt_infer:
248
+ feed_dict = {
249
+ "hidden_states": sample,
250
+ "res_hidden_states_0": res_samples[0],
251
+ "res_hidden_states_1": res_samples[1],
252
+ "res_hidden_states_2": res_samples[2],
253
+ "temb": emb,
254
+ }
255
+ up_results = self.engines[f"up_blocks.{i}"](feed_dict, self.cuda_stream)
256
+ sample = up_results["sample"]
257
+ else:
258
+ sample = upsample_block(
259
+ hidden_states=sample,
260
+ temb=emb,
261
+ res_hidden_states_0=res_samples[0],
262
+ res_hidden_states_1=res_samples[1],
263
+ res_hidden_states_2=res_samples[2],
264
+ )
265
+
266
+ # 6. post-process
267
+ if self.conv_norm_out:
268
+ sample = self.conv_norm_out(sample)
269
+ sample = self.conv_act(sample)
270
+ sample = self.conv_out(sample)
271
+
272
+ if not return_dict:
273
+ return (sample,)
274
+
275
+ return UNet2DConditionOutput(sample=sample)
src/trt_pipeline/utils.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: MIT
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining a
5
+ # copy of this software and associated documentation files (the "Software"),
6
+ # to deal in the Software without restriction, including without limitation
7
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
+ # and/or sell copies of the Software, and to permit persons to whom the
9
+ # Software is furnished to do so, subject to the following conditions:
10
+ #
11
+ # The above copyright notice and this permission notice shall be included in
12
+ # all copies or substantial portions of the Software.
13
+ #
14
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
+ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20
+ # DEALINGS IN THE SOFTWARE.
21
+
22
+ from collections import OrderedDict
23
+
24
+ import numpy as np
25
+ import tensorrt as trt
26
+ import torch
27
+ from cuda import cudart
28
+ from polygraphy.backend.common import bytes_from_path
29
+ from polygraphy.backend.trt import engine_from_bytes
30
+
31
+ numpy_to_torch_dtype_dict = {
32
+ np.uint8: torch.uint8,
33
+ np.int8: torch.int8,
34
+ np.int16: torch.int16,
35
+ np.int32: torch.int32,
36
+ np.int64: torch.int64,
37
+ np.float16: torch.float16,
38
+ np.float32: torch.float32,
39
+ np.float64: torch.float64,
40
+ np.complex64: torch.complex64,
41
+ np.complex128: torch.complex128,
42
+ }
43
+
44
+
45
+ class Engine:
46
+ def __init__(
47
+ self,
48
+ ):
49
+ self.engine = None
50
+ self.context = None
51
+ self.buffers = OrderedDict()
52
+ self.tensors = OrderedDict()
53
+ self.cuda_graph_instance = None # cuda graph
54
+ self.has_cross_attention = False
55
+
56
+ def __del__(self):
57
+ del self.engine
58
+ del self.context
59
+ del self.buffers
60
+ del self.tensors
61
+
62
+ def load(self, engine_path):
63
+ self.engine = engine_from_bytes(bytes_from_path(engine_path))
64
+
65
+ def activate(self, reuse_device_memory=None):
66
+ if reuse_device_memory:
67
+ self.context = self.engine.create_execution_context_without_device_memory() # type: ignore
68
+ self.context.device_memory = reuse_device_memory
69
+ else:
70
+ self.context = self.engine.create_execution_context() # type: ignore
71
+
72
+ def allocate_buffers(self, shape_dict=None, device="cuda", batch_size=1):
73
+ for binding in range(self.engine.num_io_tensors): # type: ignore
74
+ name = self.engine.get_tensor_name(binding) # type: ignore
75
+ if shape_dict and name in shape_dict:
76
+ shape = shape_dict[name]
77
+ else:
78
+ shape = self.engine.get_tensor_shape(name) # type: ignore
79
+ shape = (batch_size * 2,) + shape[1:]
80
+ dtype = trt.nptype(self.engine.get_tensor_dtype(name)) # type: ignore
81
+ if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT: # type: ignore
82
+ self.context.set_input_shape(name, shape) # type: ignore
83
+ tensor = torch.empty(tuple(shape), dtype=numpy_to_torch_dtype_dict[dtype]).to(
84
+ device=device
85
+ )
86
+ self.tensors[name] = tensor
87
+
88
+ def __call__(self, feed_dict, stream, use_cuda_graph=False):
89
+ for name, buf in feed_dict.items():
90
+ self.tensors[name].copy_(buf)
91
+
92
+ for name, tensor in self.tensors.items():
93
+ self.context.set_tensor_address(name, tensor.data_ptr()) # type: ignore
94
+
95
+ if use_cuda_graph:
96
+ if self.cuda_graph_instance is not None:
97
+ cuassert(cudart.cudaGraphLaunch(self.cuda_graph_instance, stream))
98
+ cuassert(cudart.cudaStreamSynchronize(stream))
99
+ else:
100
+ # do inference before CUDA graph capture
101
+ noerror = self.context.execute_async_v3(stream) # type: ignore
102
+ if not noerror:
103
+ raise ValueError("ERROR: inference failed.")
104
+ # capture cuda graph
105
+ cuassert(
106
+ cudart.cudaStreamBeginCapture(
107
+ stream, cudart.cudaStreamCaptureMode.cudaStreamCaptureModeGlobal
108
+ )
109
+ )
110
+ self.context.execute_async_v3(stream) # type: ignore
111
+ self.graph = cuassert(cudart.cudaStreamEndCapture(stream))
112
+ self.cuda_graph_instance = cuassert(cudart.cudaGraphInstantiate(self.graph, 0))
113
+ else:
114
+ noerror = self.context.execute_async_v3(stream) # type: ignore
115
+ if not noerror:
116
+ raise ValueError("ERROR: inference failed.")
117
+
118
+ return self.tensors
119
+
120
+
121
+ def cuassert(cuda_ret):
122
+ err = cuda_ret[0]
123
+ if err != cudart.cudaError_t.cudaSuccess:
124
+ raise RuntimeError(
125
+ f"CUDA ERROR: {err}, error code reference: https://nvidia.github.io/cuda-python/module/cudart.html#cuda.cudart.cudaError_t"
126
+ )
127
+ if len(cuda_ret) > 1:
128
+ return cuda_ret[1]
129
+ return None
uv.lock CHANGED
@@ -77,17 +77,13 @@ wheels = [
77
  ]
78
 
79
  [[package]]
80
- name = "deepcache"
81
- version = "0.1.1"
82
  source = { registry = "https://pypi.org/simple" }
83
- dependencies = [
84
- { name = "diffusers" },
85
- { name = "torch" },
86
- { name = "transformers" },
87
- ]
88
- sdist = { url = "https://files.pythonhosted.org/packages/97/f4/499a3bbe535e2d3612b5d0d44e94c80498856f99ae4b57d02da2a4128281/DeepCache-0.1.1.tar.gz", hash = "sha256:8bc995d8c0ee7f3eb51ca080c951916bf0eb044ebdc75215b1753621ac8f80e6", size = 190065 }
89
  wheels = [
90
- { url = "https://files.pythonhosted.org/packages/8e/40/f7024b19494d5cbf0ae85e04da2fed973690af971edfbfd181573b3a6b34/DeepCache-0.1.1-py3-none-any.whl", hash = "sha256:aa4aa5f8e9a2e5a41d59900305c3c86c20834537d0c67bf2d8bb7075618ae48a", size = 190872 },
 
 
91
  ]
92
 
93
  [[package]]
@@ -115,23 +111,37 @@ version = "6"
115
  source = { editable = "." }
116
  dependencies = [
117
  { name = "accelerate" },
118
- { name = "deepcache" },
119
  { name = "diffusers" },
120
  { name = "edge-maxxing-pipelines" },
121
  { name = "omegaconf" },
 
 
 
 
 
 
122
  { name = "torch" },
123
  { name = "transformers" },
 
124
  ]
125
 
126
  [package.metadata]
127
  requires-dist = [
128
  { name = "accelerate", specifier = "==0.31.0" },
129
- { name = "deepcache" },
130
  { name = "diffusers", specifier = "==0.30.2" },
131
  { name = "edge-maxxing-pipelines", git = "https://github.com/womboai/edge-maxxing?subdirectory=pipelines&rev=8d8ff45863416484b5b4bc547782591bbdfc696a#8d8ff45863416484b5b4bc547782591bbdfc696a" },
132
  { name = "omegaconf", specifier = "==2.3.0" },
 
 
 
 
 
 
133
  { name = "torch", specifier = "==2.4.1" },
134
  { name = "transformers", specifier = "==4.41.2" },
 
135
  ]
136
 
137
  [[package]]
@@ -299,6 +309,7 @@ version = "12.1.105"
299
  source = { registry = "https://pypi.org/simple" }
300
  wheels = [
301
  { url = "https://files.pythonhosted.org/packages/eb/d5/c68b1d2cdfcc59e72e8a5949a37ddb22ae6cade80cd4a57a84d4c8b55472/nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40", size = 823596 },
 
302
  ]
303
 
304
  [[package]]
@@ -391,6 +402,23 @@ wheels = [
391
  { url = "https://files.pythonhosted.org/packages/e3/94/1843518e420fa3ed6919835845df698c7e27e183cb997394e4a670973a65/omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b", size = 79500 },
392
  ]
393
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
  [[package]]
395
  name = "packaging"
396
  version = "24.1"
@@ -426,6 +454,28 @@ wheels = [
426
  { url = "https://files.pythonhosted.org/packages/ec/3d/c32a51d848401bd94cabb8767a39621496491ee7cd5199856b77da9b18ad/pillow-11.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:224aaa38177597bb179f3ec87eeefcce8e4f85e608025e9cfac60de237ba6316", size = 2567508 },
427
  ]
428
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
429
  [[package]]
430
  name = "psutil"
431
  version = "6.1.0"
@@ -569,6 +619,15 @@ wheels = [
569
  { url = "https://files.pythonhosted.org/packages/19/46/5d11dc300feaad285c2f1bd784ff3f689f5e0ab6be49aaf568f3a77019eb/safetensors-0.4.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:21742b391b859e67b26c0b2ac37f52c9c0944a879a25ad2f9f9f3cd61e7fda8f", size = 606660 },
570
  ]
571
 
 
 
 
 
 
 
 
 
 
572
  [[package]]
573
  name = "sympy"
574
  version = "1.13.3"
@@ -581,6 +640,40 @@ wheels = [
581
  { url = "https://files.pythonhosted.org/packages/99/ff/c87e0622b1dadea79d2fb0b25ade9ed98954c9033722eb707053d310d4f3/sympy-1.13.3-py3-none-any.whl", hash = "sha256:54612cf55a62755ee71824ce692986f23c88ffa77207b30c1368eda4a7060f73", size = 6189483 },
582
  ]
583
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
584
  [[package]]
585
  name = "tokenizers"
586
  version = "0.19.1"
@@ -704,6 +797,15 @@ wheels = [
704
  { url = "https://files.pythonhosted.org/packages/ce/d9/5f4c13cecde62396b0d3fe530a50ccea91e7dfc1ccf0e09c228841bb5ba8/urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac", size = 126338 },
705
  ]
706
 
 
 
 
 
 
 
 
 
 
707
  [[package]]
708
  name = "zipp"
709
  version = "3.20.2"
 
77
  ]
78
 
79
  [[package]]
80
+ name = "cuda-python"
81
+ version = "12.6.0"
82
  source = { registry = "https://pypi.org/simple" }
 
 
 
 
 
 
83
  wheels = [
84
+ { url = "https://files.pythonhosted.org/packages/0b/a3/ad3148d068d78e8ad1e40094ab787338ea4bef06fbe2915cf1557a5c5f98/cuda_python-12.6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dee03e2ba78a807a907a7939dddf089bb8a780faaf7ccbcbfc2461090af11e78", size = 23793330 },
85
+ { url = "https://files.pythonhosted.org/packages/86/93/f00a5f48eb67216d8a8818b93c0e8bbe5949f297add3367522081ec5223c/cuda_python-12.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e177f584094d9c9fd9c7d153168486a3966765c79cb2a80e86feb15e3b5adc14", size = 24223726 },
86
+ { url = "https://files.pythonhosted.org/packages/f6/e0/c2302ff6796eac6c6f1e1414f163c6a38deba62af0b7df2b77562656188c/cuda_python-12.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:3b1e9711c6455fabd947076d52eb21ea508ade95eb4dd33838b0339a84238125", size = 9995130 },
87
  ]
88
 
89
  [[package]]
 
111
  source = { editable = "." }
112
  dependencies = [
113
  { name = "accelerate" },
114
+ { name = "cuda-python" },
115
  { name = "diffusers" },
116
  { name = "edge-maxxing-pipelines" },
117
  { name = "omegaconf" },
118
+ { name = "onnx" },
119
+ { name = "polygraphy" },
120
+ { name = "setuptools" },
121
+ { name = "tensorrt" },
122
+ { name = "tensorrt-cu12-bindings" },
123
+ { name = "tensorrt-cu12-libs" },
124
  { name = "torch" },
125
  { name = "transformers" },
126
+ { name = "wheel" },
127
  ]
128
 
129
  [package.metadata]
130
  requires-dist = [
131
  { name = "accelerate", specifier = "==0.31.0" },
132
+ { name = "cuda-python", specifier = ">=12.6.0" },
133
  { name = "diffusers", specifier = "==0.30.2" },
134
  { name = "edge-maxxing-pipelines", git = "https://github.com/womboai/edge-maxxing?subdirectory=pipelines&rev=8d8ff45863416484b5b4bc547782591bbdfc696a#8d8ff45863416484b5b4bc547782591bbdfc696a" },
135
  { name = "omegaconf", specifier = "==2.3.0" },
136
+ { name = "onnx" },
137
+ { name = "polygraphy" },
138
+ { name = "setuptools", specifier = ">=75.2.0" },
139
+ { name = "tensorrt", specifier = ">=10.5.0" },
140
+ { name = "tensorrt-cu12-bindings", specifier = ">=10.5.0" },
141
+ { name = "tensorrt-cu12-libs", specifier = ">=10.5.0" },
142
  { name = "torch", specifier = "==2.4.1" },
143
  { name = "transformers", specifier = "==4.41.2" },
144
+ { name = "wheel" },
145
  ]
146
 
147
  [[package]]
 
309
  source = { registry = "https://pypi.org/simple" }
310
  wheels = [
311
  { url = "https://files.pythonhosted.org/packages/eb/d5/c68b1d2cdfcc59e72e8a5949a37ddb22ae6cade80cd4a57a84d4c8b55472/nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40", size = 823596 },
312
+ { url = "https://files.pythonhosted.org/packages/9f/e2/7a2b4b5064af56ea8ea2d8b2776c0f2960d95c88716138806121ae52a9c9/nvidia_cuda_runtime_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:dfb46ef84d73fababab44cf03e3b83f80700d27ca300e537f85f636fac474344", size = 821226 },
313
  ]
314
 
315
  [[package]]
 
402
  { url = "https://files.pythonhosted.org/packages/e3/94/1843518e420fa3ed6919835845df698c7e27e183cb997394e4a670973a65/omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b", size = 79500 },
403
  ]
404
 
405
+ [[package]]
406
+ name = "onnx"
407
+ version = "1.17.0"
408
+ source = { registry = "https://pypi.org/simple" }
409
+ dependencies = [
410
+ { name = "numpy" },
411
+ { name = "protobuf" },
412
+ ]
413
+ sdist = { url = "https://files.pythonhosted.org/packages/9a/54/0e385c26bf230d223810a9c7d06628d954008a5e5e4b73ee26ef02327282/onnx-1.17.0.tar.gz", hash = "sha256:48ca1a91ff73c1d5e3ea2eef20ae5d0e709bb8a2355ed798ffc2169753013fd3", size = 12165120 }
414
+ wheels = [
415
+ { url = "https://files.pythonhosted.org/packages/2e/29/57053ba7787788ac75efb095cfc1ae290436b6d3a26754693cd7ed1b4fac/onnx-1.17.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:38b5df0eb22012198cdcee527cc5f917f09cce1f88a69248aaca22bd78a7f023", size = 16645616 },
416
+ { url = "https://files.pythonhosted.org/packages/75/0d/831807a18db2a5e8f7813848c59272b904a4ef3939fe4d1288cbce9ea735/onnx-1.17.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d545335cb49d4d8c47cc803d3a805deb7ad5d9094dc67657d66e568610a36d7d", size = 15908420 },
417
+ { url = "https://files.pythonhosted.org/packages/dd/5b/c4f95dbe652d14aeba9afaceb177e9ffc48ac3c03048dd3f872f26f07e34/onnx-1.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3193a3672fc60f1a18c0f4c93ac81b761bc72fd8a6c2035fa79ff5969f07713e", size = 16046244 },
418
+ { url = "https://files.pythonhosted.org/packages/08/a9/c1f218085043dccc6311460239e253fa6957cf12ee4b0a56b82014938d0b/onnx-1.17.0-cp310-cp310-win32.whl", hash = "sha256:0141c2ce806c474b667b7e4499164227ef594584da432fd5613ec17c1855e311", size = 14423516 },
419
+ { url = "https://files.pythonhosted.org/packages/0e/d3/d26ebf590a65686dde6b27fef32493026c5be9e42083340d947395f93405/onnx-1.17.0-cp310-cp310-win_amd64.whl", hash = "sha256:dfd777d95c158437fda6b34758f0877d15b89cbe9ff45affbedc519b35345cf9", size = 14528496 },
420
+ ]
421
+
422
  [[package]]
423
  name = "packaging"
424
  version = "24.1"
 
454
  { url = "https://files.pythonhosted.org/packages/ec/3d/c32a51d848401bd94cabb8767a39621496491ee7cd5199856b77da9b18ad/pillow-11.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:224aaa38177597bb179f3ec87eeefcce8e4f85e608025e9cfac60de237ba6316", size = 2567508 },
455
  ]
456
 
457
+ [[package]]
458
+ name = "polygraphy"
459
+ version = "0.49.9"
460
+ source = { registry = "https://pypi.org/simple" }
461
+ wheels = [
462
+ { url = "https://files.pythonhosted.org/packages/4a/f5/a2b20c677c1a856cc9e08cd0b5a5105450ed5253e369e938ddd31d91c547/polygraphy-0.49.9-py2.py3-none-any.whl", hash = "sha256:62ae22825efdd3288222e5b1d2d791fe58e87844fcd848bcd1251fbce02ba956", size = 346910 },
463
+ ]
464
+
465
+ [[package]]
466
+ name = "protobuf"
467
+ version = "5.28.3"
468
+ source = { registry = "https://pypi.org/simple" }
469
+ sdist = { url = "https://files.pythonhosted.org/packages/74/6e/e69eb906fddcb38f8530a12f4b410699972ab7ced4e21524ece9d546ac27/protobuf-5.28.3.tar.gz", hash = "sha256:64badbc49180a5e401f373f9ce7ab1d18b63f7dd4a9cdc43c92b9f0b481cef7b", size = 422479 }
470
+ wheels = [
471
+ { url = "https://files.pythonhosted.org/packages/d1/c5/05163fad52d7c43e124a545f1372d18266db36036377ad29de4271134a6a/protobuf-5.28.3-cp310-abi3-win32.whl", hash = "sha256:0c4eec6f987338617072592b97943fdbe30d019c56126493111cf24344c1cc24", size = 419624 },
472
+ { url = "https://files.pythonhosted.org/packages/9c/4c/4563ebe001ff30dca9d7ed12e471fa098d9759712980cde1fd03a3a44fb7/protobuf-5.28.3-cp310-abi3-win_amd64.whl", hash = "sha256:91fba8f445723fcf400fdbe9ca796b19d3b1242cd873907979b9ed71e4afe868", size = 431464 },
473
+ { url = "https://files.pythonhosted.org/packages/1c/f2/baf397f3dd1d3e4af7e3f5a0382b868d25ac068eefe1ebde05132333436c/protobuf-5.28.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:a3f6857551e53ce35e60b403b8a27b0295f7d6eb63d10484f12bc6879c715687", size = 414743 },
474
+ { url = "https://files.pythonhosted.org/packages/85/50/cd61a358ba1601f40e7d38bcfba22e053f40ef2c50d55b55926aecc8fec7/protobuf-5.28.3-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:3fa2de6b8b29d12c61911505d893afe7320ce7ccba4df913e2971461fa36d584", size = 316511 },
475
+ { url = "https://files.pythonhosted.org/packages/5d/ae/3257b09328c0b4e59535e497b0c7537d4954038bdd53a2f0d2f49d15a7c4/protobuf-5.28.3-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:712319fbdddb46f21abb66cd33cb9e491a5763b2febd8f228251add221981135", size = 316624 },
476
+ { url = "https://files.pythonhosted.org/packages/ad/c3/2377c159e28ea89a91cf1ca223f827ae8deccb2c9c401e5ca233cd73002f/protobuf-5.28.3-py3-none-any.whl", hash = "sha256:cee1757663fa32a1ee673434fcf3bf24dd54763c79690201208bafec62f19eed", size = 169511 },
477
+ ]
478
+
479
  [[package]]
480
  name = "psutil"
481
  version = "6.1.0"
 
619
  { url = "https://files.pythonhosted.org/packages/19/46/5d11dc300feaad285c2f1bd784ff3f689f5e0ab6be49aaf568f3a77019eb/safetensors-0.4.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:21742b391b859e67b26c0b2ac37f52c9c0944a879a25ad2f9f9f3cd61e7fda8f", size = 606660 },
620
  ]
621
 
622
+ [[package]]
623
+ name = "setuptools"
624
+ version = "75.2.0"
625
+ source = { registry = "https://pypi.org/simple" }
626
+ sdist = { url = "https://files.pythonhosted.org/packages/07/37/b31be7e4b9f13b59cde9dcaeff112d401d49e0dc5b37ed4a9fc8fb12f409/setuptools-75.2.0.tar.gz", hash = "sha256:753bb6ebf1f465a1912e19ed1d41f403a79173a9acf66a42e7e6aec45c3c16ec", size = 1350308 }
627
+ wheels = [
628
+ { url = "https://files.pythonhosted.org/packages/31/2d/90165d51ecd38f9a02c6832198c13a4e48652485e2ccf863ebb942c531b6/setuptools-75.2.0-py3-none-any.whl", hash = "sha256:a7fcb66f68b4d9e8e66b42f9876150a3371558f98fa32222ffaa5bced76406f8", size = 1249825 },
629
+ ]
630
+
631
  [[package]]
632
  name = "sympy"
633
  version = "1.13.3"
 
640
  { url = "https://files.pythonhosted.org/packages/99/ff/c87e0622b1dadea79d2fb0b25ade9ed98954c9033722eb707053d310d4f3/sympy-1.13.3-py3-none-any.whl", hash = "sha256:54612cf55a62755ee71824ce692986f23c88ffa77207b30c1368eda4a7060f73", size = 6189483 },
641
  ]
642
 
643
+ [[package]]
644
+ name = "tensorrt"
645
+ version = "10.5.0"
646
+ source = { registry = "https://pypi.org/simple" }
647
+ dependencies = [
648
+ { name = "tensorrt-cu12" },
649
+ ]
650
+ sdist = { url = "https://files.pythonhosted.org/packages/ee/b9/f917eb7dfe02da30bc91206a464c850f4b94a1e14b8f95870074c9b9abea/tensorrt-10.5.0.tar.gz", hash = "sha256:d5c6338d44aeda20250fdbe31f9df8ca152b830f811aaf19d6c4d1dafd18c84b", size = 16401 }
651
+
652
+ [[package]]
653
+ name = "tensorrt-cu12"
654
+ version = "10.5.0"
655
+ source = { registry = "https://pypi.org/simple" }
656
+ sdist = { url = "https://files.pythonhosted.org/packages/22/d5/a4c3e22482d4273e151123990934d7c8d0ba1e4efb9a483eba807cdce279/tensorrt-cu12-10.5.0.tar.gz", hash = "sha256:46edbda08c54c8ffa88c75d75b4761eb9839e81678135e8d1530adc8cef6a61b", size = 18341 }
657
+
658
+ [[package]]
659
+ name = "tensorrt-cu12-bindings"
660
+ version = "10.5.0"
661
+ source = { registry = "https://pypi.org/simple" }
662
+ wheels = [
663
+ { url = "https://files.pythonhosted.org/packages/21/be/cab39a2c387887fa87bb8f199d113a10ebd0ba8b052927c2ae43b1495cf6/tensorrt_cu12_bindings-10.5.0-cp310-none-manylinux_2_17_x86_64.whl", hash = "sha256:45a31cc3f25489bb05fc9cb8dae0e63b205bf3da1656c44430f97cf263d5720c", size = 1117215 },
664
+ { url = "https://files.pythonhosted.org/packages/02/49/36db3b3c0bd0c7dc68964c75b1691b46abe8388708b4da04c3261f8ab7c0/tensorrt_cu12_bindings-10.5.0-cp310-none-manylinux_2_31_aarch64.whl", hash = "sha256:900b87824ebbc9e1059a4a9a5ed3040eb9d74ba9a601674086030d373996692a", size = 1091646 },
665
+ { url = "https://files.pythonhosted.org/packages/17/df/e95a92fa4d43df918cc8bc681697b1423a988db339af25bafe25068c522d/tensorrt_cu12_bindings-10.5.0-cp310-none-win_amd64.whl", hash = "sha256:2bf2eb6d36ed9fe44a4b416def538775012abec34fdb5a6fb8461dd569717055", size = 769305 },
666
+ ]
667
+
668
+ [[package]]
669
+ name = "tensorrt-cu12-libs"
670
+ version = "10.5.0"
671
+ source = { registry = "https://pypi.org/simple" }
672
+ dependencies = [
673
+ { name = "nvidia-cuda-runtime-cu12" },
674
+ ]
675
+ sdist = { url = "https://files.pythonhosted.org/packages/ff/d2/28d4bdadcb4690e7c051ae23ac5559dffca7ee6bf859ea76c9ab9931ba53/tensorrt_cu12_libs-10.5.0.tar.gz", hash = "sha256:358b3a36c30ab74ad710f227b410206ae94e8d1003c09b75216e39813dac0d9d", size = 630 }
676
+
677
  [[package]]
678
  name = "tokenizers"
679
  version = "0.19.1"
 
797
  { url = "https://files.pythonhosted.org/packages/ce/d9/5f4c13cecde62396b0d3fe530a50ccea91e7dfc1ccf0e09c228841bb5ba8/urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac", size = 126338 },
798
  ]
799
 
800
+ [[package]]
801
+ name = "wheel"
802
+ version = "0.44.0"
803
+ source = { registry = "https://pypi.org/simple" }
804
+ sdist = { url = "https://files.pythonhosted.org/packages/b7/a0/95e9e962c5fd9da11c1e28aa4c0d8210ab277b1ada951d2aee336b505813/wheel-0.44.0.tar.gz", hash = "sha256:a29c3f2817e95ab89aa4660681ad547c0e9547f20e75b0562fe7723c9a2a9d49", size = 100733 }
805
+ wheels = [
806
+ { url = "https://files.pythonhosted.org/packages/1b/d1/9babe2ccaecff775992753d8686970b1e2755d21c8a63be73aba7a4e7d77/wheel-0.44.0-py3-none-any.whl", hash = "sha256:2376a90c98cc337d18623527a97c31797bd02bad0033d41547043a1cbfbe448f", size = 67059 },
807
+ ]
808
+
809
  [[package]]
810
  name = "zipp"
811
  version = "3.20.2"