diff --git a/loss_params.pth b/loss_params.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9fc2cfc6dc7bb8d4e6eab51ba6ebd96c163dca11
--- /dev/null
+++ b/loss_params.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e4c687fb455b7495e325d5f1761391d281323de6d2a493b153a3dac9536664e
+size 3120
diff --git a/onnx/up_blocks.0/attentions.1.transformer_blocks.6.norm3.weight b/onnx/up_blocks.0/attentions.1.transformer_blocks.6.norm3.weight
new file mode 100644
index 0000000000000000000000000000000000000000..e54789179622edc0d02162a4780a5b59a58b96f5
--- /dev/null
+++ b/onnx/up_blocks.0/attentions.1.transformer_blocks.6.norm3.weight
@@ -0,0 +1,9 @@
+[:9:::Y:k:::~:U:$:$:M:R:K:Z:|:b:Z:*:y:::b:g:<:<:=:E:‰:m:2:R:•9„::':D:‰:Q:_:õ9T:‰:s:`:ª:l:l:‰::Y:g:t:ý9:K:2:R:a::,:8:	:.:=:z:u:c:ž:T:h:;:,:n:ð9G:
+:›:E:t:T:H::Q:O:|:,:G::[:ô8œ9j:b:K:>:ü9Z:t:::::$:=:Z::Z:U:4:ƒ:':Z:m:m:Y:D::t:K:_:$:G:{:4:d:L:ü9z::n:q:f:Ù9
+:b:::G:X:Z:$:î9:Z:E:m:Z:b:d:$:z::d:j:Z:%:W::::b:K:t:E:ª:à9K:L:¥9::}:ì98:b:$:*:E:}:T:::8:z:c:h:4::j:4::®:m::k:8:M:M:T:ƒ:K:ê9:3:E:,:I:à9:î9Q:2:h:I:Š:::Ó9s:}:ˆ:n:Y::‰:8:L:+:K:4:h:D::¨:>:=:,:$:d:b::b:):Z::-:m:K:E:D:<:L:j:N:::z:*:L:>:D:N:Z:Z:|:[:n:%:j:J:E:i:é9[:[:U:::X::´:[:G:é9t:a:z:::k:b:K:\:::,:j:Z:|:=:2:K:<:©:1:L:N:::\:b:û9D:`:”:™:}:j:„:]:¤:39:\:.:k:E::T:E:R:[:^:F:R:}:L::]:$:3:b:n:!::Š:q:E:~:3:K:<::|:::T::4:::R:2:j:*:ü9z:$:‚:õ9N:Ì9Z:b:Œ:I:9:R:,:T:{:E:b:2:2:Š:n:m::c:ì9_:6:ú9[:×9„:E:,:z:m:\:b:b:n:R::f:E:,:„:F:t::P:T::f::::m:à9):K:A:t:&:Ž:Z:\::b:3:8:2:>:1:':è9d:t:E:Y:V:\:û9N:f:•:%:N:*:>:t:è9ü9d:T:K:P:n:Z:`:y:ý9÷9T:f:1:n:):L:j:u:d:::g::Ž:•:u::T:I:»:m:Z:=:}:F:b:n:::M:;:F:I:4:„::l:º:N:L:K:õ9m:V:&:8:&::::t:(:|:9:L:K:Y:Ž:|:T:2:t:[:Š:Œ:t:[:l:<:z:::N:Y:*:>:$:<:L:Œ:T:«9;:,:H:K:&:j:œ:::L:v:.:T:–:.:,:‹:d:a::E:[:2:m:|:K::T:9:÷9:E:m:>:ž::+:&:K:::X:m:<:&:2:>:%:F:•::H:G:>:=:~:T:::%:(::E:L:K:\:::E::=:Þ9*:u:…:m:N:F:&:>:{:(:>:‚:T:::.:f:Z:ü9Z:T:,::&:=:T:–::8::::n:\:\:¤:>:,:ê9:[::%:h:0:F:u:T:2:,:Z:L:,:*::f:<:*:m::z:>:T:M:*:«9.::>:k:Ž:e:!:::&:K:z:K:Z:>:::*:,:,:::F::b:b:&:::Y:4:g:1:•:z:x:M:V:E:v:$:õ9m:N:–::t:=:d::n:¤:b:|:z:*::z:‰::t:>::þ9\:[:,::E:œ:‰:8:û9œ:L:F::S:©:>:-:j:‚:Z::S:t:}:d:2:Œ:,:ë9Y:f:2:j:f:j:L:d:):„:y:ª:E:&:=:>:|:E:T:[:~:::[:«9d:[:T:ˆ::Š:':e:Z:„:h::E:H:Z:P:4::L::Š:f:,:u:Y:g::T:Z:ô9:2:2:ž:ˆ:L:N:9ï2t:2:=:4:˜:û9‹:K:Z::€92:h:L:„::::R::v:[:™:H:õ9,:‚::z:Š:v:::::t::[:¤:ˆ:ˆ:n:–:S:):›:v:ä9m:2:=:S:f:T:m:t:m:|:Ò9q:„:@:h:<:*:ª:N:j:2:\:,::Z:v:Š:*:j:=:K:S::=:E:N:Y::::¥9M:ï9:::m:m:>:2:::#:S:_:X:D::N:b:U::
+:v:F:»9T:N:Ó9Z::|::::|:ž:Z:,:j:›:[:>:M::I:/:::Z:&:1:\:z:4::b:w:m:&:L:F:::<:&:Š:.:L:D:K:n::n:&:‘9:1:[:”::õ9):N:Q:
+:\:F:Œ:v:û9::l:m:Y:O:8:3:::b::>:L:„:
+:::*:,:—:|:d:$:N:Š::Ž:F:m:&:E::m::=:::
+:t:Œ::::G:[:*:Z:::b:d:S:G: :X:Y:$:K:‚:b:K:D::::0:L:E:,:
+:z:<::I:2:Ì9=:m:–:v:T:M:R:%:D:/:E:z:ž:,:‚:E::::¨:[:v::t:M:~:‚:ø9\:~:::8:N:2::<:J:N:S:::u:1::T:Š:m::|::\:3:i:K:õ9z:Z:<:k:X:N:x:
+::÷9[:^:‰:K:|:E:E:n:3:t:,::[:b:*::h:E:=:S:V:_:{:|:Y:2:Z::Z::9d:D:b:m:<::d:c:z:0:E:v:2:$:f::::h:9::,:þ9è9º:\::,:Y:u:j:::ƒ:::ü9[:Š:>:8:E:_:T:Y:ü9:´:X:,:ˆ:j:i:t:Ó9T:=:|:m:v:˜::t:Z:N:[:::z::X:Z:>:b:[:•:l:b:,::~:*:<::Z:n:	:[:U:Z:::8:p::G:4:D:N:-:j:„::U:ë9^:L:G:Q:T:m:ë9Y:3::õ9$::9:”:T:L:::²9(:d:K:=:R:
\ No newline at end of file
diff --git a/onnx/up_blocks.0/attentions.1.transformer_blocks.7.attn1.to_out.0.bias b/onnx/up_blocks.0/attentions.1.transformer_blocks.7.attn1.to_out.0.bias
new file mode 100644
index 0000000000000000000000000000000000000000..c73319ebfbc2c76def06c85255d94ff913fab1de
Binary files /dev/null and b/onnx/up_blocks.0/attentions.1.transformer_blocks.7.attn1.to_out.0.bias differ
diff --git a/onnx/up_blocks.0/attentions.1.transformer_blocks.7.norm1.bias b/onnx/up_blocks.0/attentions.1.transformer_blocks.7.norm1.bias
new file mode 100644
index 0000000000000000000000000000000000000000..f381f72c4c8e0ea867a219e4df2bf83c1d3e29ac
Binary files /dev/null and b/onnx/up_blocks.0/attentions.1.transformer_blocks.7.norm1.bias differ
diff --git a/onnx/up_blocks.0/attentions.1.transformer_blocks.7.norm1.weight b/onnx/up_blocks.0/attentions.1.transformer_blocks.7.norm1.weight
new file mode 100644
index 0000000000000000000000000000000000000000..24eb2a85be6cc626b289d1dd9097982ea80a6a62
Binary files /dev/null and b/onnx/up_blocks.0/attentions.1.transformer_blocks.7.norm1.weight differ
diff --git a/onnx/up_blocks.0/attentions.1.transformer_blocks.7.norm3.weight b/onnx/up_blocks.0/attentions.1.transformer_blocks.7.norm3.weight
new file mode 100644
index 0000000000000000000000000000000000000000..9bbb0492a60af22de563c06888817cbaed6695a3
--- /dev/null
+++ b/onnx/up_blocks.0/attentions.1.transformer_blocks.7.norm3.weight
@@ -0,0 +1,3 @@
+~:s:b:‹::j:Ì:¬:K:h:ª:Â:h:n:Š:‰:ª:}:”:‚:=:t:ž:´:R:T:=:›:s:l:ª::²:b:t:z:Û:b:Œ::Œ:w:›:Z:¤:Ž:ª:ª:d:¢::¢::S:v:v:g:°::Z:N:-:œ:T:w:Œ:b:¾:”:[:z:Ž:Š::R:=:Ã:‰:[:´:n:E:b:z:ˆ:n:h:\:•:"9.:ˆ:Š:N:p:2:h:Ì:‹:j:G:”:Œ:t:v:|:k:º:I:n:¦:´:°::E:›:t:Œ:z:z:Š:K:¹:a:R:³:u:ƒ:—:A:-:h:t:Œ:k:€:t:4::H:j:t:œ:Z:ƒ:²:Z:Š:F:Ž::~:b:•:j:L:v:z:¸:„:²:%:n:z:à9Œ:F:•:4:j:ƒ:m:t:.:¤:`:Y:\:¤:¾:T:b:k:t:e:D:»:ª:L:z:L:‚:²:}:½:z:A:H:L:z:s:x:T:ˆ:
+:Š:P:|:Z:ž:s:þ9l:ž:•:¤:›:¾:ª:h::R:Š:s:k:ˆ::¬:—::T::t:m:2:›:$:|:†:b:ª:—:;œ:m:Š:Œ:}:ª:œ:j:z:|:˜:œ:¬:Š:º:‰:„:z:¶:k:m:‰::b:ž:d:b:2:v:©:¤:¸:X:3:Â:s:—:T:K:ˆ:{:_:¾:>:b::,::£:|:–:•:¸:„:œ:b:t:Œ:Š:*:€:m:Ä:Ÿ:§:¤:˜:Y::U9G:ˆ:˜:ˆ:~:Y:~:j:b:Z:Œ::„:•:Œ:ƒ:w:m:m:œ:ª:z:L:¯:T:_:¥:T:”:£:S:œ:l:‚::|:v:‰:k:*:Ì:ƒ:<:Š:x:œ:D:k::•:;¨:[:ƒ:”:}:†:ˆ:•:ˆ:L:w:n:®:”:å:²:;:K:Œ:::z:):–:Š:j:Š:Ê:•:ª:œ:˜:¤:[:¢:j::•:¥:i:N:•::9:‹:F:T:|::z:ª:m:œ:•:œ::Œ:Z:“:„:E:„:P:8:n:S:³:Ä:f:•:j:T:N:v:t:Œ:{:•:q:€:ª:&:\::œ:–:y:¦:Œ:k:|:E:%:h:u:g:¼:ƒ:š:t:Ê:„:u:Y:‹:R:ª:¤:•::b:|:Ø:¢:v:h:¸:£:Ž:|:b:L:v:€:n:|:Œ:h:…:Ì:f:I:h:.:|:•:z:œ:•:_:e:‚:[:ª:j:z:v:‹:Ô:•:|:t:„:˜:¬:Ä:œ:„:Œ:L:•:j:ˆ:u:[:::Q:j:´:Ê:•:è9T:œ:V:–:m:„:Ê:Ž:N:³:„:•:¾:Œ:|:’:{:Ž:T:Š:œ:\:œ:ƒ:j:j:ª::2::f:Ä:|:¾:U:a:k:f:z:\:«:{:Z:N:[:\:~:°:d:m:›:Ž:E:Œ:{:l:n:N:\:Z:¤:_:z:m:N:„:8:k:=:p:Ä:°:•:•:b:H:t:¦:Y:Z:Ê:…:‰:I:•:Š:Ž:œ:{:m:*:=::»:¯:¤:Œ:Ž::z:|:{:»:Œ:N:m:L:•:&:m:Ž:c:Œ:´:°:t:x:|:œ:‚:T:M:®:|:T:œ:Z:š:t:v:½:b:H:d:G:r:„:”:t:|:~:|:Œ:Š:~:ª:Š:…:L:M:m:y:ª:’:Œ:„:[:I:¢:j:°:j:z:´:Ž:G:l:h:¤::::¾:y:z:m:ˆ:v:n:Z:Ä:	;¤:œ:|:Š::™:Ž:b:Š:Š:|:::b:u:g:4:›::œ:m:N:¸:z:•:m:»:¨:F:2:¥:½:«:T:K:ž:ˆ:‹:Z:¦:b:<:n:Ê::|:ƒ:¥:Q:f:9:Û:¤:¬:ˆ::Š:ƒ:´:Ž:n:b:–:–:•:*:„:œ:•:•:T:Ø:Š:š:h:¬:Š:9:|:d:Š:ž:Z:*:>:ª:•:b:/:–:›:ª:X:œ:u:(:Q:a:t:¯:²:r:4:N:Ž2Â:ž::‚:¼::²:‰:l:b::h:Œ:N:»:„:8:{:Ô:|:Z:Ô:p:>:T::2:Ò:Ž:®:U:f:ª:$:£:Â:ž:Š:|:¸:©:l:ª:ª:,:::f:•:œ:|:ª:•:d:œ:Ø9Æ:ª:b:ˆ:b:“:¾:z:ž:x:¬:|:&:z:ª:´:l:•:w:::h:u:|:t:b:•:^:•:ð9„:þ9‹:<:ž:²:m:~:u:N:ª:~:j:z:ž:[:Ž:[:m:L:ª:Z:¹9¦:Y:,:œ:d:j::Y:œ:Ê:¾:ƒ:•:©:|:•:|:<:}:_:Ž::d:>:f:–:~:\:\:h:ˆ::v:j:‚:[:v:Ê:–:c:‚:n:i:R:_:•:ƒ9{:œ:|:ª:U:\:”:e:Ž:F:²:ž:Ê:ž::$:m:¬:v:£:Œ:[:•:E:m:Œ:•:„:¼:E:m:b:b:‚:•:›:u:U:ª:F:Ê:•:²:T:•:º:·:f:‹::|:—:³:Š:¬:˜:u:n:—:T:’::–:Y:½::˜:L:j:Ê:‚:•:d:;:j:h:¤:z:b::®:z:T:Š:Š:D:E:œ::::y:™:n:m:v:˜:˜:¾:q:Ú:z:V:š:ì:Š:™:d:²:j:¦:¬:::œ:ž:T:b:v:S:t:f:¤:‚:ƒ:l:$:Ã:t:[:•:¾:š:b:‹:N:¬:|:”:š:	:ˆ:ª:j:”:˜:v:ž:8:T::j:”:·:T:d:Z:b:™:s:”:”:\:M:¨::b:‰:Š:=:„:•::Ò:k:³:[:}:‹:Š:À9|:Y:V:„:—:::š:ž:´:+:m:¢:„:\:—:„:z:¢:í9{:N:/::ª:b:D:‚:\:‚:•:•:Ò:{:4:Z:²:|::::Œ:ž:D:&:K:º:}:G:{:‹:Š:Û:ü9t:K:ª:z:¾:²:f:•:j:z:¤:K:œ:$:k:n:œ:ƒ:¨:—:¾:y:d:\:Š:8:M:[:¢:m:S:e:m:~:Š::ª:=:^:t:Ž:=:Y:„:²::n:
+:®:m:t:l:c::8::|:j:4:h:4:»9|:´:‚:¨:l::8:‹:h:|:::
\ No newline at end of file
diff --git a/onnx/up_blocks.0/attentions.1.transformer_blocks.8.ff.net.2.bias b/onnx/up_blocks.0/attentions.1.transformer_blocks.8.ff.net.2.bias
new file mode 100644
index 0000000000000000000000000000000000000000..91d99d19912b30ab617dd3efa6f403c2efc262cf
Binary files /dev/null and b/onnx/up_blocks.0/attentions.1.transformer_blocks.8.ff.net.2.bias differ
diff --git a/onnx/up_blocks.0/attentions.1.transformer_blocks.8.norm1.weight b/onnx/up_blocks.0/attentions.1.transformer_blocks.8.norm1.weight
new file mode 100644
index 0000000000000000000000000000000000000000..6c3b66d2a385eae6a08099767467bb8687522928
Binary files /dev/null and b/onnx/up_blocks.0/attentions.1.transformer_blocks.8.norm1.weight differ
diff --git a/onnx/up_blocks.0/attentions.1.transformer_blocks.9.ff.net.0.proj.bias b/onnx/up_blocks.0/attentions.1.transformer_blocks.9.ff.net.0.proj.bias
new file mode 100644
index 0000000000000000000000000000000000000000..98d954d04b22fd98ff8cb03ab974e27a8f3b31f4
Binary files /dev/null and b/onnx/up_blocks.0/attentions.1.transformer_blocks.9.ff.net.0.proj.bias differ
diff --git a/onnx/up_blocks.0/attentions.1.transformer_blocks.9.norm3.bias b/onnx/up_blocks.0/attentions.1.transformer_blocks.9.norm3.bias
new file mode 100644
index 0000000000000000000000000000000000000000..11544b4e42a08a5c66cbcc4b5e614a93aeb531b2
Binary files /dev/null and b/onnx/up_blocks.0/attentions.1.transformer_blocks.9.norm3.bias differ
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.4.norm2.weight b/onnx/up_blocks.0/attentions.2.transformer_blocks.4.norm2.weight
new file mode 100644
index 0000000000000000000000000000000000000000..6f0b3723e5a669498e699362e4102dccc2c78ac0
Binary files /dev/null and b/onnx/up_blocks.0/attentions.2.transformer_blocks.4.norm2.weight differ
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.6.attn1.to_out.0.bias b/onnx/up_blocks.0/attentions.2.transformer_blocks.6.attn1.to_out.0.bias
new file mode 100644
index 0000000000000000000000000000000000000000..b286c97b82f8090b735fd36d685ea5c9d4d74a7f
Binary files /dev/null and b/onnx/up_blocks.0/attentions.2.transformer_blocks.6.attn1.to_out.0.bias differ
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.6.ff.net.0.proj.bias b/onnx/up_blocks.0/attentions.2.transformer_blocks.6.ff.net.0.proj.bias
new file mode 100644
index 0000000000000000000000000000000000000000..33a30d89e3cac35133d9e99af9ea54d3a155c76b
Binary files /dev/null and b/onnx/up_blocks.0/attentions.2.transformer_blocks.6.ff.net.0.proj.bias differ
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.6.ff.net.2.bias b/onnx/up_blocks.0/attentions.2.transformer_blocks.6.ff.net.2.bias
new file mode 100644
index 0000000000000000000000000000000000000000..0a1a063ad3d3fc621ccb0785ea435e9a465b58bf
Binary files /dev/null and b/onnx/up_blocks.0/attentions.2.transformer_blocks.6.ff.net.2.bias differ
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.6.norm1.bias b/onnx/up_blocks.0/attentions.2.transformer_blocks.6.norm1.bias
new file mode 100644
index 0000000000000000000000000000000000000000..50ea8aaa1de8053d43b8654c5d5ecaa3e9a6fba4
Binary files /dev/null and b/onnx/up_blocks.0/attentions.2.transformer_blocks.6.norm1.bias differ
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.6.norm1.weight b/onnx/up_blocks.0/attentions.2.transformer_blocks.6.norm1.weight
new file mode 100644
index 0000000000000000000000000000000000000000..d19b9096aa20f41e2951dbf0a67eaa4de6d387ae
Binary files /dev/null and b/onnx/up_blocks.0/attentions.2.transformer_blocks.6.norm1.weight differ
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.6.norm2.weight b/onnx/up_blocks.0/attentions.2.transformer_blocks.6.norm2.weight
new file mode 100644
index 0000000000000000000000000000000000000000..ac6e0a51862d15861c198a052089a76689a81461
Binary files /dev/null and b/onnx/up_blocks.0/attentions.2.transformer_blocks.6.norm2.weight differ
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.7.attn1.to_out.0.bias b/onnx/up_blocks.0/attentions.2.transformer_blocks.7.attn1.to_out.0.bias
new file mode 100644
index 0000000000000000000000000000000000000000..6509ffc1ec970fec02ed4d5c0245e13c9eb8cba8
Binary files /dev/null and b/onnx/up_blocks.0/attentions.2.transformer_blocks.7.attn1.to_out.0.bias differ
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.7.norm1.bias b/onnx/up_blocks.0/attentions.2.transformer_blocks.7.norm1.bias
new file mode 100644
index 0000000000000000000000000000000000000000..8b0588d4ab92e4bf5c8bc16ee3c8c7d4e05717df
Binary files /dev/null and b/onnx/up_blocks.0/attentions.2.transformer_blocks.7.norm1.bias differ
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.7.norm1.weight b/onnx/up_blocks.0/attentions.2.transformer_blocks.7.norm1.weight
new file mode 100644
index 0000000000000000000000000000000000000000..146e7d3a80c6d2013b20af2872791c5109b0a191
Binary files /dev/null and b/onnx/up_blocks.0/attentions.2.transformer_blocks.7.norm1.weight differ
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.7.norm2.weight b/onnx/up_blocks.0/attentions.2.transformer_blocks.7.norm2.weight
new file mode 100644
index 0000000000000000000000000000000000000000..3bd4e5e3284531543183f48952d82032152f2653
Binary files /dev/null and b/onnx/up_blocks.0/attentions.2.transformer_blocks.7.norm2.weight differ
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.7.norm3.weight b/onnx/up_blocks.0/attentions.2.transformer_blocks.7.norm3.weight
new file mode 100644
index 0000000000000000000000000000000000000000..72d489b6d888b9acce7df55c90b08e5c75f11b60
--- /dev/null
+++ b/onnx/up_blocks.0/attentions.2.transformer_blocks.7.norm3.weight
@@ -0,0 +1,3 @@
+{:V::F:+:d:~::Q:Z:]:P:n:a:„:e::\:K:ù9\:\:•:€:Å9F:S:N:g:|:z:%:ˆ:>:K:h:l:k:D:I:$8Z:m:l:^:a:•:5:T:k:E:Š:>:z:z:<:t:Š:~:\:%:4:p:Z::L:k:¼:H:\:Y:8:j:F:n:l:8:¦:l:\:Œ:}:t:Ž:É2{:\:N:%:\:::ü9^:]:d:\:k:*:+:n:Æ9]:b:+:<:œ:\:õ9~:†:Œ:l:t:.:K::]:\:F:L:d:|:|:P:d:F::>:Y:j:€:u:u:Œ::d:p:m:Y:M:n:M:b:ˆ:4:F:4:õ9F:@:5::S:::V:F:a:Ž:d:Ž:P:Ÿ:Š:1:0:T:„:u:J:\:T:Z:<:<:S:—:k:›:#::{:d:`:|::o:{:d:n:I:>:Z::„:T:~:f:†:M:z::e:V:k:d:`:%:ª:€:z:.:L:„:p:M:o::i:Z:~:z:M:{:™::j:Z:0:Q:š:®:k:F:h:k:\:Œ:<:I:•:f:u:3:>::{:F:i:p:š:`:%:<:Ž9f:Y:f:t:H:~:z:E:F:Ž:~:>:p:t:M:F:–:~:):~:k:l:^:–:R:a:k:{:m:8:d:E:f:|9I:I::>:^:K:x:]:F:Z:„:z:F:>:ˆ:~:F:n:Z:ª:`:Œ:F::Y:P:<:<:f:f:T:4:•:f:Æ9N:h:k:L:M:I:•:f:F:l:Ž:;:F:u:+:N:\:•:k:K:l:6:~:Ž:Ù9„:€:N::Y:œ:_:¤:\:F:u:L:L:…:L:V:=:Ž:\:|:û9:¢:5:t:d:m:t:[:`:F:K:p:b:5:v:&::Y:o:ˆ:Š:{:\:S:F:„:]:d:F:K:j:L::€:{:k:`:‰::„:D:Š:N:F:t:k:`:|:Î9N:Y:Y:W:n:™:ƒ:
+:P:M:V:;:S:P:K:a:F:|:n:m:“:}:{:S:\:4::‚:d:f:\:†:z:M:F:ƒ:]:j:9:‰:l:†:z:ˆ:4:o:Z:6:t:u:.:>:M:u:\:Ž:b::Î:h:\:P::Š:å9y:]:ƒ:k::+:>:v:V:Œ:L:{:Y:†:>:•:k:m:\:d:\:n:û9Ž:]:t:k:V:L:z:>:|:{:E:5:j::\:†:{:.:4:5:0:t:V:v:L:h:#:F:•:d:²::<:R:^:K:l:b:Z:Z:K:Œ:E:2:\:Z:~: :M:<:R:5:K:b:p:i:Y:o:\:0:D:{:†::~:Q:j:„:¬:p:>:%:+:Q:g:„:a:Ž:d:E: :^:+:{:ž:K:W:^:Ž:j:T:µ:Z:\:M:ì9u:l:‹:z:<:€:Z:k:x:Œ:l:f:~:n:]:P:Ž:+:j:€:V:>:i:j:\:d:Ž:E:P:F:o:E:P:F:‚:P:i:Z:f::[:F:k:4:~:l:6:M:ª::\:P:Y:S:~:D:l:j:u:À:M:V:::i:#:n:L:~:P:4:n:i:^:6:a:Ž:•:*:L:z:€:\:T:b:z:Ž:~:{:„:d:z:|:J:M:S:F:œ:F:t:8:]:`:%:l:„:m:§:>:[:n:b:F::„:t:0::v:~:Œ:F:t:”9•:v:+:n:j:~:
+::Š:I:m:Ž:d:Æ:K:h:Œ:n:S:¦::~:{:6:T::O:™:d:S:L::.:E:V:N:•:™:V:Z:†:}:L:3:+:d:d:{:M:m:z:z:„:n:V:V:4:L:5:š:W:¼:#:e:F:d:Ž:E:F:@:0:€:V:t:{:„:Y:<:„:M:W:p:M:f:•::„:%:L:M:~:‚:u:o:+:T:F:Z:Œ:ì99:L:g:Œ:K:_:^:l:E:€:K:¦:@:V:l:\:<:M:Z:\:F:M:]:d::k:„::m:#:0:%:W:F:%:::‹:F:K:<:`:Œ:k:9:‡:.:M:D:k:d:`:V:/:i:1:u:.:£:5:L:F:t:Z:k:|:N:„:d:n:z:S:M:R:[:I:j:z:L:<:8:u:~:õ9u:B:{:š:S:4:I:g:t:€:<:::f:u:V:Y:•:8:F:N:u:`:M:L:\:J: :;:{:Y:„:™:u:+:.:r:F:Ž:#:L:i:I:u:n:z:L:T:::F:R:S:E:P:•:<:\:P:C:P:4:~:b:n:::K:„:.::?:0::M:D:Œ:<:^:%::>:¬8\:V:<:{:Ä9u:a:~:m:^:f:m:E:j:f:o:Ž:P:>:4:!:Ž::.:Œ:Z::F:Y:†:n:]:m:Z:V:Œ:f:<:?:Q:Y:t:„:4:+:„:,:z::k::1:Y:l:(:H:m:„:„:l:~:I:o:+::\:^:Ž:h:F:F:`:%:I:ì9\:i:T:i:E:¤:Œ:°:e:_:S:t:`::>:u:„:u:d:z:‰:L:„:}:K:]:4:Q:%:t:]:S:F:a:b::*:{:f:^:^:P:d:~::F:¢:f:o:„:F:%:V:8:Ž:z:x:2:Z:n:4:N:„:V:\:\:•:V:{:l:‹:F:n:y::Ô-i:::Ž:.:~:_:,:N:t:P:%:M:–:¼:l:L:M:t:`::F:{:>::V: :,:M:]:^:N:u:M:f:Y:5:F:,:\:::.:0:ª:m:o:³:4:•:#:P:~:T:<:P:†:E:u:>:.:-:S:†:\:u:x:y:m::f:`:u: :h:`:™:l:p:D:4:€:d:W::+:u:\:H:>:4:n:M:*:V:D::h:I:„:M:‹:x:†::6:\:d:4:f:L:n:•:w:F:š:’9m:S::x:6:b:z:T:•:H:{:Œ:n:‚:<:F::+:è95:e:M:
\ No newline at end of file
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.8.attn1.to_out.0.bias b/onnx/up_blocks.0/attentions.2.transformer_blocks.8.attn1.to_out.0.bias
new file mode 100644
index 0000000000000000000000000000000000000000..d758763870c3feb4227b8c614d187ab3e3a8507d
Binary files /dev/null and b/onnx/up_blocks.0/attentions.2.transformer_blocks.8.attn1.to_out.0.bias differ
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.8.attn2.to_out.0.bias b/onnx/up_blocks.0/attentions.2.transformer_blocks.8.attn2.to_out.0.bias
new file mode 100644
index 0000000000000000000000000000000000000000..1ad5101c40947382da404c67eabe4bb7b8889de9
Binary files /dev/null and b/onnx/up_blocks.0/attentions.2.transformer_blocks.8.attn2.to_out.0.bias differ
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.8.norm1.bias b/onnx/up_blocks.0/attentions.2.transformer_blocks.8.norm1.bias
new file mode 100644
index 0000000000000000000000000000000000000000..2380df4857d665e3fe3506320945bf8e2b2654d1
Binary files /dev/null and b/onnx/up_blocks.0/attentions.2.transformer_blocks.8.norm1.bias differ
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.8.norm2.weight b/onnx/up_blocks.0/attentions.2.transformer_blocks.8.norm2.weight
new file mode 100644
index 0000000000000000000000000000000000000000..618187eab8484013fa3757bdedf6bb235a097cdd
Binary files /dev/null and b/onnx/up_blocks.0/attentions.2.transformer_blocks.8.norm2.weight differ
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.8.norm3.bias b/onnx/up_blocks.0/attentions.2.transformer_blocks.8.norm3.bias
new file mode 100644
index 0000000000000000000000000000000000000000..4a67aeb7e0a1b236f7495884ee125dfd3e7ebf76
Binary files /dev/null and b/onnx/up_blocks.0/attentions.2.transformer_blocks.8.norm3.bias differ
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.8.norm3.weight b/onnx/up_blocks.0/attentions.2.transformer_blocks.8.norm3.weight
new file mode 100644
index 0000000000000000000000000000000000000000..6fedf2b8373471feef3bde32b2e66a82a5d5922c
--- /dev/null
+++ b/onnx/up_blocks.0/attentions.2.transformer_blocks.8.norm3.weight
@@ -0,0 +1,3 @@
+M:f:#:r::D:N::\:I:J:V:n:G:\:d:P:R:9:
+:N:z:t:n:î9V:V:D:Y:M:F::†:0:r:2:O:X:&:l:­8^:K:<:Z::…:<:?:p:X:Ž:F:^:V::x:•:l:X:Z:X:X:f::S:Q:Z:::L:I::Y:P:l:^::P:8:@:†:u:c:f:w3l:_:D::Z:>:::;:`:G:G:-::t:`:Û9f:J:	:m:f:;:1:Y:d:„:\:^:#:^:I:€:-:d:6:d:z:^:6:;:+::T:L:V:{:O:\:l:p:f:\:p:k:\:Z:L:´:K:E:X::+:I:L:K:(:w:<:\:<:L:m:f:d:M:n:f:A:-:j:l:n:F:6:J:`:(:h:Œ:v:x:f:(:0:p:S:f:Œ:1:>:\:f:N:O:*:6:2:u:d:^:e:|:M:p:l:+:Q:S:G:H:M:G:g:Z::U:t:t:6:<:Y:F:K:®:X:c:œ:\::?:\: :d:l:€:d:n:o:I:N:V:I:T:V:\:€::^:M:\:P:^:K:z:>:.:K:º9œ:+:d:F:6:w:\:G:1:w:N::X:^:_:f:l:Œ:X:^:>:M:N:z:F:`:d:F:n:g::<:l:æ9n::é9U:s:K:ˆ:F: : :t:Z:>:P:„:>:6:t:T::\:n:z:%:O:P:%:6:>:|:F:K:n:9:à97:?:F:?:Q:Z:M:\:Y:O:y:U:<:_:>:x:X:f:G::p::e:R::d:d: :r:\:g:N:¤:N:0:K:Z:X::P:E:Y:V:p:n:+:Ž:t::t:P:F:F:<:w:u::L:u:D:N:L::p:>:>:f:K:L:J:>:h:l:`::+:E:H:-:ˆ:^:^:d:z:0:a:N:z::F:S:<:f:t::I:N:(:K:x:‚:\:0:V:`:?:F:d:F:Z:t:f:M:n:+:†:{:\:E:?::~:^:V:x:>:œ:T:p:M:d:Z:R:(:o::Z:4:`::o:f:X:V:•:@:i:>:m:m:d:f:
+:•:Z:P:f::S:Y:+:?:_:J:0:6:V:\:F:t::U:*:Ž:P::Y:}:0:E:^:f:û9„:F:R:V:_:d:M:<:m:Q:O:<:a:^:8:ˆ:Š:>:8:L:(:M:Y:V:Z:R::L:•:Z:•:Œ:-:-:X::V:^:I:1:E:f:J:&:n:X:e:5:\:>:Q:f:f:u::\:Y:9:F:1:;:f:z:\:E:6:f:j:c:Q:l:,::0:^:™:K:Œ:\:o:z:\:.:€:†:a:O:':†:L:l:¢:-:=:V::]:p:m:U:f:d:Q:Q:E:]:L:f:l:\:u:E:†:û9O:z:M:(:f:l:V:]:P:Z:^:(:M:>:p:>:f:N:O:l:t::f:.:m:X:Š:P:P:x:Š:(:S:N:N:I:‚:*:[:Œ:d:w:P:E::(:M::X:S:u:n:F:\::y:(:z:Ž:M:û9I:^:l:X:F:u:f:{:M:F:u:\:6:u:P:i:>:u:u:V:U:@:\:„:d:G:e:6:Œ:>:^:^:/:E::f:z:\::u::X:F:V::t:^:M:M:\:f:ÿ9d:n:<:V:Z:^:x:>:]:F:4:+:m:\:t:~:1:l:S:(:p:J:>:E:,:::K:T:u:n:<:=:‹:u:N:<:V:Q:6:d:Z:r:i:N:j:f:K:l::R:9:Ž:6:z:(:U:E:E:N:M:<:1::m:^:N:n:‚:.:(:†:j:Ž:Ž:I:I:f:d:M::Q:Q:•:S:Y:n::>:*:_:g:û9P:(:N:P:h:F:d:V:F:Ž:n:•:S:\:K:^:0:F:5:\:>:?:]:;:t:T:Œ:.:>::M:\:<:N:F::]:Ž:>:X:;:M:™:\:K:o:\:^:M:M:@:E:l:R:F::Ž:4:m:l:D:<:R:+::š:<:~:-:’:}:<:(:S:l:D:~:u:d:M:1:a:L:í9Z:2:K:f:F:0:R:+:a:z:K:6:<:d:\:`:r:<:%:N:-:P:N:-:}:<:z::f:N:l:…:p::6:L:r:K:K:F:•:D:V:>:u:>:d:Š:2:>:c:F:4:f:L:E:S:K:T:>:F::V:O::>:l:X:d:.:>::G:+:o:6:^:V::M:Æ8>:K:Y:;:Ï9j:P:‚:]:f:<:^:F:t:g:p:|:<:V:j:+:n:F::`:;:`:M:V:M:m:0:t::+:Œ:d::0:=:N:\:m:<:G:V:e:%:X:E:F:>:n:F:^:Y:a:0:†:`::::<:+:z:m:U:Z:&:P:Z:P:L::<:^:<:P:0:¬:L:†:\:O:\:v:>::S:^:{:w:X:F:n:F:^:f:0:=:2:l:+:V:<:Z:f:f:u:+:>:T:K:K:O:b:^:L:&:I:}:m:n:i:n::^:8:}:~:g:F:n:?:D:<:|:^:0:F:™:X:f:I:M:6:†:m::T0\:>:e:::€:x::+::r::\:n:•:Q:>:;:Y:d::l:^:P::0:<:4:>:M:Œ:l:Ž::f:6:<:2:P:<:0:1:Q:„:d:Y:e:F:„:P:^:`:d:Z:K:n:4:X:x:(:/:Q:Z:]:l:a:†:{:F:d:}:z:^:\:\:†:k:{:F:*:l:::N:F:6:z:Z:D:.::P::%:%:: :l:2:~:N:™:d:S:x:F:>:>:K:f:f:8:Z:†:0:Ž:¥9Œ:X:S:Z:2:V:n:I:n:š:t:F:6:m:%:F::::E:=:+:
\ No newline at end of file
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.9.attn1.to_out.0.bias b/onnx/up_blocks.0/attentions.2.transformer_blocks.9.attn1.to_out.0.bias
new file mode 100644
index 0000000000000000000000000000000000000000..42e1e6f3e68a2a59243f65867d52368800db58d3
Binary files /dev/null and b/onnx/up_blocks.0/attentions.2.transformer_blocks.9.attn1.to_out.0.bias differ
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.9.attn2.to_out.0.bias b/onnx/up_blocks.0/attentions.2.transformer_blocks.9.attn2.to_out.0.bias
new file mode 100644
index 0000000000000000000000000000000000000000..8135adc1337f782abf0d0801fbe44329195f79c1
Binary files /dev/null and b/onnx/up_blocks.0/attentions.2.transformer_blocks.9.attn2.to_out.0.bias differ
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.9.ff.net.0.proj.bias b/onnx/up_blocks.0/attentions.2.transformer_blocks.9.ff.net.0.proj.bias
new file mode 100644
index 0000000000000000000000000000000000000000..003322f9589e33635d865b0db8837a399aa135e6
Binary files /dev/null and b/onnx/up_blocks.0/attentions.2.transformer_blocks.9.ff.net.0.proj.bias differ
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.9.norm1.bias b/onnx/up_blocks.0/attentions.2.transformer_blocks.9.norm1.bias
new file mode 100644
index 0000000000000000000000000000000000000000..981c69ccd3af192ab782a6d33127693d9cee3ba5
Binary files /dev/null and b/onnx/up_blocks.0/attentions.2.transformer_blocks.9.norm1.bias differ
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.9.norm2.weight b/onnx/up_blocks.0/attentions.2.transformer_blocks.9.norm2.weight
new file mode 100644
index 0000000000000000000000000000000000000000..4cd76d79c962cb7e1ef3ac3163a833a520a3910c
Binary files /dev/null and b/onnx/up_blocks.0/attentions.2.transformer_blocks.9.norm2.weight differ
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.9.norm3.bias b/onnx/up_blocks.0/attentions.2.transformer_blocks.9.norm3.bias
new file mode 100644
index 0000000000000000000000000000000000000000..16334f71c03fa6aa24cbe79225bd3a377c428d2a
Binary files /dev/null and b/onnx/up_blocks.0/attentions.2.transformer_blocks.9.norm3.bias differ
diff --git a/onnx/up_blocks.0/attentions.2.transformer_blocks.9.norm3.weight b/onnx/up_blocks.0/attentions.2.transformer_blocks.9.norm3.weight
new file mode 100644
index 0000000000000000000000000000000000000000..08949c3ee51a6a84106821f49cc3c4776ba9a2d2
Binary files /dev/null and b/onnx/up_blocks.0/attentions.2.transformer_blocks.9.norm3.weight differ
diff --git a/onnx/up_blocks.0/onnx__Add_6908 b/onnx/up_blocks.0/onnx__Add_6908
new file mode 100644
index 0000000000000000000000000000000000000000..2936ef35c70dd3aa99f802621eaeb52c4dcedf97
Binary files /dev/null and b/onnx/up_blocks.0/onnx__Add_6908 differ
diff --git a/onnx/up_blocks.0/onnx__Add_7154 b/onnx/up_blocks.0/onnx__Add_7154
new file mode 100644
index 0000000000000000000000000000000000000000..dd8f84e2b9b6e3cbd885b319ab6d7682b89206e9
Binary files /dev/null and b/onnx/up_blocks.0/onnx__Add_7154 differ
diff --git a/onnx/up_blocks.0/onnx__Add_7400 b/onnx/up_blocks.0/onnx__Add_7400
new file mode 100644
index 0000000000000000000000000000000000000000..5c4c1425cc33c3124e2029c29ac77f5d3b135338
Binary files /dev/null and b/onnx/up_blocks.0/onnx__Add_7400 differ
diff --git a/onnx/up_blocks.0/onnx__Add_7402 b/onnx/up_blocks.0/onnx__Add_7402
new file mode 100644
index 0000000000000000000000000000000000000000..8cff03f0857b754b6a0eb3b37edd0e4a1af9c448
Binary files /dev/null and b/onnx/up_blocks.0/onnx__Add_7402 differ
diff --git a/onnx/up_blocks.0/onnx__Add_7404 b/onnx/up_blocks.0/onnx__Add_7404
new file mode 100644
index 0000000000000000000000000000000000000000..628234a840b23f23dc98bd689754c2485b76cd93
Binary files /dev/null and b/onnx/up_blocks.0/onnx__Add_7404 differ
diff --git a/onnx/up_blocks.0/onnx__Mul_7155 b/onnx/up_blocks.0/onnx__Mul_7155
new file mode 100644
index 0000000000000000000000000000000000000000..819dae504c2f0e43434b17fd68c8ab3a2d65bf60
--- /dev/null
+++ b/onnx/up_blocks.0/onnx__Mul_7155
@@ -0,0 +1,9 @@
+9Ê89/9£839=9b9y9k9J9û8[9ú89Þ8_89ú8:9„9#9C9b9<9y999999¤9$939|99Œ9ô8R9|999¹9:99-9Ù8ä8W9z8J9)939B99é899D9÷8B9½8L9Ú9Ú9¥969B9›9ã8-9U9ƒ9r9I9l9J9ž9[9D99*9(9¼9ƒ9„9*9ï8ì8C9l8ø8Ž9r9æ8ü8«9949ü8›89<9Ž9œ9-9p9:9t9:9q9ý8v9¾9j99499 9ã89æ8º9#9k9Ã8[9¥99÷8d9I99œ8G9Þ8Ó99d9‚9+9Ú89	9/9æ89(99Y9K999299Ú8ä8{9ò8399Ì8œ9J9­89|8J9>9599"9Ê8ì8Þ8939*99^9999K9û8w99ê8ƒ9[9ƒ99æ8j9â89[9ù8e9¹8ß8ò89e9999D9>99w9U9h9=9>9:9Þ8æ8Z9b9ì8Ä8.9d99>9z9ü99999:9Þ8ó8š9ì8¾89Y9k9Þ8ô8ä89Ó8"9ô8Þ8ü7£8Ô8ý8ú8»8Ò8Þ8"9Þ8L9ü8Õ8]89æ89½8Ó8©8ù8´8³8›8Þ88î8ò8Ø8é8ú8W6Ã8ã8U99=99949«8ý89é8
+9–89B99Þ8ä8Ï89l99ë8b9"9ì89B999Þ8É89ú8®9~9ô8ò8*99ô8Ê9ú89999Û4^9Ì99¨9%9Ú8)9Ó8´89	9M9ƒ9M9ì8-9:9Ì8›8ý8«8Ï89ô8½8[9û839ô8ì8Ê8Ñ8ê8Ä89÷8û89ª7ê8U9%9J9ê889ñ8Ô8É88ë8­9Ó8%9/9ó8ì8U8-9«8’8Ë8m8ý8!9Í8ì8
+9w9ò8ö8,9¾8Ì8É8
+9j99S97í8Ý8÷8Ì89û8ý8æ89å89=939¹8@9«8Ã8»8í8d9ø89.9ô8û8#9ô89µ8b9ý8á8#9þ8ý8«8½8/99‚9ý89ã8)9Ë8í89Þ8Í849ò8û8-9m9+9ã8¾89~9\9û8L9ñ8ä89Ù8Î8ò8Ä8L9N9/9û8K99}9#969ß8Ú8Ý8ê8û8”8•8Ò8.9C9ö7„8Ì8³8÷8ñ8b9ï8Ä8’9ñ8B9«89ò8ó8U9.9Þ8ô8ô8¹89#9ý89ú8ä89‘9³89û8©8<9Ó89ò899«8¢8û8Ý8U999Ê8:929ô8Ø89B99û8Ï8õ8ì8û9›8ö8j9.9†9}9[99¦9-9Ó9V9i9I9U9~9^9¾8L9<9U9f99E99V9®969;9Æ9­99:99ì8E9:9~9E9•9¶94:¥969B9>99E99+9.9@9ê8Û8<9+9+999Ú8ñ8+9ê8ã8µ89#99+9*9999^999599º9U9s90999
+99J9Ë8]8Õ8(9ñ89d99:9ì8Í8ß8»8}9²9Ž89	9Ù8/99y9U9V9#9)9[9$92:9>9ô8V9„8K9Ë9¶8E9U99959ý8ƒ9V9Š99z9Ó9E9Š9Ž9499ý8^9+99:9t9V9†99®9Å9d9]9s9<9Þ8¥9#9í9f9l9›9Ã8Ú8999û8µ8Õ8ò8¼8¿8K9Ó89õ8i8Ä8ñ7Ù8”8$899-9Í8ã8¼8æ8û8Ã8ý8m9ë8ì8û8Ú89>9Ä899939*939ä8939Ê8+99û8ä899ý8Ÿ89Õ8ì8}99:99{9+9œ89ì89â8999Ÿ8ì8¼9j99Æ8ë8Ý89Û8›8Ê8k9ç99Ä8Ì8Õ8M99ê8ê9ó8«8ß8û8Û899›8Õ89ô89ß8ê89ƒ99)9e9Í8,9&9ê89÷8z8ƒ969999/99¾8ã899E9ê899Ó8Ô8ë89Þ8*9•9È8©99—8ì8«8ž9¤9ì8999/9k9œ9z9j99"9G9#9+9ò8ì8699+939Þ8B999ƒ99]9ß8L9ì8ž8Ä869»839Õ8Ú869ö8399ã8U9Õ8/9à9r9E9ò8"9•8¬8´8z8ã89³8Û899Ó8Í8„8^999ï89ì8ó8	9©8+949ñ89Œ9Ú8+99ò8J9'99>9Ë8ò8Ý89:9J9þ8°8Ý8³8	9æ8ò8"9B9+9ì8û8	9s9Ê8:9k9ø8h9É89û8i99>9b9Õ8
+9º899/999m9ƒ99J9599ß8ô8ã8<9Õ8U9ý8ë89¥9[9ì89Û89:;9Û8ê8&9j9G99Æ99:9¼8Ú8.9ý89¹8i8&999ô8ü89996999Þ869ë8Ò8J9ô89D99Ì89D9ë8ü8Û89U9”99Ñ899¼899,9N9w999ß8ú8#989ü8«8Ï8¹89*9/9U99d99r939ú8ü89ä89s9Ø8$9
+9‹8-9{9Ø89ö899$9/99û8Ì89Ý8Õ89ò8999[9Y9­8ë8:9µ8¼89»8ã89Î9:9ô89­8U9ù8d99ú8+9ê869ž8H99Ó8ó88û8&9Ú8ò8º8999-99ô8-9
+99|99:9ƒ99Z9Ý8Ä89ü89¿8r939ô89ß8ú8k99/99æ8+9ê8æ89÷8m99Z99
+9k99m9Í89ð89÷8l9ý8H9Ë8N9Ô8ë89Þ89U999b9%9Ø8D9K9[99b9>949ƒ9U9š99ô8©999B99Ú8â89ô8÷8’9*9Ø8”9,9Ý8”9*9ß8G9Ñ8B9Û8K9«8Y8Š8’8c8ô8ß8È8¡88Í8º8«6«8ý8®8Š8Ó88²9
\ No newline at end of file
diff --git a/onnx/up_blocks.0/resnets.0.conv2.bias b/onnx/up_blocks.0/resnets.0.conv2.bias
new file mode 100644
index 0000000000000000000000000000000000000000..689a2910b96d69f4a3b62c52e53e8b25cbf82b5e
Binary files /dev/null and b/onnx/up_blocks.0/resnets.0.conv2.bias differ
diff --git a/onnx/up_blocks.0/resnets.0.time_emb_proj.bias b/onnx/up_blocks.0/resnets.0.time_emb_proj.bias
new file mode 100644
index 0000000000000000000000000000000000000000..ae25c67d95fa9a236c17a89393ece17fd6e47243
Binary files /dev/null and b/onnx/up_blocks.0/resnets.0.time_emb_proj.bias differ
diff --git a/onnx/up_blocks.0/resnets.1.conv2.bias b/onnx/up_blocks.0/resnets.1.conv2.bias
new file mode 100644
index 0000000000000000000000000000000000000000..041afb33112c03a524b87c9778bd8692e2b7833f
Binary files /dev/null and b/onnx/up_blocks.0/resnets.1.conv2.bias differ
diff --git a/onnx/up_blocks.0/resnets.1.conv_shortcut.bias b/onnx/up_blocks.0/resnets.1.conv_shortcut.bias
new file mode 100644
index 0000000000000000000000000000000000000000..deab7520a0a1d99efd74bb9cbfce4e3eb9e88ddd
Binary files /dev/null and b/onnx/up_blocks.0/resnets.1.conv_shortcut.bias differ
diff --git a/onnx/up_blocks.0/resnets.2.conv1.bias b/onnx/up_blocks.0/resnets.2.conv1.bias
new file mode 100644
index 0000000000000000000000000000000000000000..111cdfecdd3fc920216c01af061d4b0542ef1437
Binary files /dev/null and b/onnx/up_blocks.0/resnets.2.conv1.bias differ
diff --git a/onnx/up_blocks.0/resnets.2.conv_shortcut.bias b/onnx/up_blocks.0/resnets.2.conv_shortcut.bias
new file mode 100644
index 0000000000000000000000000000000000000000..38d115bc6354443376ebec603a67b060338ea4c7
Binary files /dev/null and b/onnx/up_blocks.0/resnets.2.conv_shortcut.bias differ
diff --git a/onnx/up_blocks.0/resnets.2.time_emb_proj.bias b/onnx/up_blocks.0/resnets.2.time_emb_proj.bias
new file mode 100644
index 0000000000000000000000000000000000000000..1b1643c8ec1a850b5626b87250d9ca553033852f
Binary files /dev/null and b/onnx/up_blocks.0/resnets.2.time_emb_proj.bias differ
diff --git a/onnx/up_blocks.0/upsamplers.0.conv.bias b/onnx/up_blocks.0/upsamplers.0.conv.bias
new file mode 100644
index 0000000000000000000000000000000000000000..a11abe507831e52cb30788627683439c0013cdf5
Binary files /dev/null and b/onnx/up_blocks.0/upsamplers.0.conv.bias differ
diff --git a/src/assets/sdxl_cache.png b/src/assets/sdxl_cache.png
new file mode 100644
index 0000000000000000000000000000000000000000..cc40b7f0331215eff229d3c5f8eb16607f587bdf
Binary files /dev/null and b/src/assets/sdxl_cache.png differ
diff --git a/src/cache_diffusion/cachify.py b/src/cache_diffusion/cachify.py
new file mode 100644
index 0000000000000000000000000000000000000000..df5b3efbb222c69984c78f75ea84a6a3fb04dd16
--- /dev/null
+++ b/src/cache_diffusion/cachify.py
@@ -0,0 +1,144 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+import fnmatch
+from contextlib import contextmanager
+
+from diffusers.models.attention import BasicTransformerBlock, JointTransformerBlock
+from diffusers.models.transformers.pixart_transformer_2d import PixArtTransformer2DModel
+from diffusers.models.transformers.transformer_sd3 import SD3Transformer2DModel
+from diffusers.models.unets.unet_2d_blocks import (
+    CrossAttnDownBlock2D,
+    CrossAttnUpBlock2D,
+    DownBlock2D,
+    UNetMidBlock2DCrossAttn,
+    UpBlock2D,
+)
+from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
+from diffusers.models.unets.unet_3d_blocks import (
+    CrossAttnDownBlockSpatioTemporal,
+    CrossAttnUpBlockSpatioTemporal,
+    DownBlockSpatioTemporal,
+    UNetMidBlockSpatioTemporal,
+    UpBlockSpatioTemporal,
+)
+from diffusers.models.unets.unet_spatio_temporal_condition import UNetSpatioTemporalConditionModel
+
+from .module import CachedModule
+from .utils import replace_module
+
+CACHED_PIPE = {
+    UNet2DConditionModel: (
+        DownBlock2D,
+        CrossAttnDownBlock2D,
+        UNetMidBlock2DCrossAttn,
+        CrossAttnUpBlock2D,
+        UpBlock2D,
+    ),
+    PixArtTransformer2DModel: (BasicTransformerBlock),
+    UNetSpatioTemporalConditionModel: (
+        CrossAttnDownBlockSpatioTemporal,
+        DownBlockSpatioTemporal,
+        UpBlockSpatioTemporal,
+        CrossAttnUpBlockSpatioTemporal,
+        UNetMidBlockSpatioTemporal,
+    ),
+    SD3Transformer2DModel: (JointTransformerBlock),
+}
+
+
+def _apply_to_modules(model, action, modules=None, config_list=None):
+    if hasattr(model, "use_trt_infer") and model.use_trt_infer:
+        for key, module in model.engines.items():
+            if isinstance(module, CachedModule):
+                action(module)
+            elif config_list:
+                for config in config_list:
+                    if _pass(key, config["wildcard_or_filter_func"]):
+                        model.engines[key] = CachedModule(module, config["select_cache_step_func"])
+    else:
+        for name, module in model.named_modules():
+            if isinstance(module, CachedModule):
+                action(module)
+            elif modules and config_list:
+                for config in config_list:
+                    if _pass(name, config["wildcard_or_filter_func"]) and isinstance(
+                        module, modules
+                    ):
+                        replace_module(
+                            model,
+                            name,
+                            CachedModule(module, config["select_cache_step_func"]),
+                        )
+
+
+def cachify(model, config_list, modules):
+    def cache_action(module):
+        pass  # No action needed, caching is handled in the loop itself
+
+    _apply_to_modules(model, cache_action, modules, config_list)
+
+
+def disable(pipe):
+    model = get_model(pipe)
+    _apply_to_modules(model, lambda module: module.disable_cache())
+
+
+def enable(pipe):
+    model = get_model(pipe)
+    _apply_to_modules(model, lambda module: module.enable_cache())
+
+
+def reset_status(pipe):
+    model = get_model(pipe)
+    _apply_to_modules(model, lambda module: setattr(module, "cur_step", 0))
+
+
+def _pass(name, wildcard_or_filter_func):
+    if isinstance(wildcard_or_filter_func, str):
+        return fnmatch.fnmatch(name, wildcard_or_filter_func)
+    elif callable(wildcard_or_filter_func):
+        return wildcard_or_filter_func(name)
+    else:
+        raise NotImplementedError(f"Unsupported type {type(wildcard_or_filter_func)}")
+
+
+def get_model(pipe):
+    if hasattr(pipe, "unet"):
+        return pipe.unet
+    elif hasattr(pipe, "transformer"):
+        return pipe.transformer
+    else:
+        raise KeyError
+
+
+@contextmanager
+def infer(pipe):
+    try:
+        yield pipe
+    finally:
+        reset_status(pipe)
+
+
+def prepare(pipe, config_list):
+    model = get_model(pipe)
+    assert model.__class__ in CACHED_PIPE.keys(), f"{model.__class__} is not supported!"
+    cachify(model, config_list, CACHED_PIPE[model.__class__])
diff --git a/src/cache_diffusion/module.py b/src/cache_diffusion/module.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1ed434c30fd1ab1feeeafc6addeb73dd04655c4
--- /dev/null
+++ b/src/cache_diffusion/module.py
@@ -0,0 +1,55 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+from torch import nn
+
+
+class CachedModule(nn.Module):
+    def __init__(self, block, select_cache_step_func) -> None:
+        super().__init__()
+        self.block = block
+        self.select_cache_step_func = select_cache_step_func
+        self.cur_step = 0
+        self.cached_results = None
+        self.enabled = True
+
+    def __getattr__(self, name):
+        try:
+            return super().__getattr__(name)
+        except AttributeError:
+            return getattr(self.block, name)
+
+    def if_cache(self):
+        return self.select_cache_step_func(self.cur_step) and self.enabled
+
+    def enable_cache(self):
+        self.enabled = True
+
+    def disable_cache(self):
+        self.enabled = False
+        self.cur_step = 0
+
+    def forward(self, *args, **kwargs):
+        if not self.if_cache():
+            self.cached_results = self.block(*args, **kwargs)
+        if self.enabled:
+            self.cur_step += 1
+        return self.cached_results
diff --git a/src/cache_diffusion/utils.py b/src/cache_diffusion/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8b2c27726367f7961e55f215ca6f064353dc8bd
--- /dev/null
+++ b/src/cache_diffusion/utils.py
@@ -0,0 +1,61 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+import re
+
+SDXL_DEFAULT_CONFIG = [
+    {
+        "wildcard_or_filter_func": lambda name: "up_blocks.2" not in name,
+        "select_cache_step_func": lambda step: (step % 2) != 0,
+    }
+]
+
+PIXART_DEFAULT_CONFIG = [
+    {
+        "wildcard_or_filter_func": lambda name: not re.search(
+            r"transformer_blocks\.(2[1-7])\.", name
+        ),
+        "select_cache_step_func": lambda step: (step % 3) != 0,
+    }
+]
+
+SVD_DEFAULT_CONFIG = [
+    {
+        "wildcard_or_filter_func": lambda name: "up_blocks.3" not in name,
+        "select_cache_step_func": lambda step: (step % 2) != 0,
+    }
+]
+
+SD3_DEFAULT_CONFIG = [
+    {
+        "wildcard_or_filter_func": lambda name: re.search(
+            r"^((?!transformer_blocks\.(1[6-9]|2[0-3])).)*$", name
+        ),
+        "select_cache_step_func": lambda step: (step % 2) != 0,
+    }
+]
+
+
+def replace_module(parent, name_path, new_module):
+    path_parts = name_path.split(".")
+    for part in path_parts[:-1]:
+        parent = getattr(parent, part)
+    setattr(parent, path_parts[-1], new_module)
diff --git a/src/loss.py b/src/loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..24a18c6efc382f39f458948bdd16d2c7ea972713
--- /dev/null
+++ b/src/loss.py
@@ -0,0 +1,45 @@
+_A=None
+import torch
+from tqdm import tqdm
+class LossSchedulerModel(torch.nn.Module):
+	def __init__(A,wx,we):super(LossSchedulerModel,A).__init__();assert len(wx.shape)==1 and len(we.shape)==2;B=wx.shape[0];assert B==we.shape[0]and B==we.shape[1];A.register_parameter('wx',torch.nn.Parameter(wx));A.register_parameter('we',torch.nn.Parameter(we))
+	def forward(A,t,xT,e_prev):
+		B=e_prev;assert t-len(B)+1==0;C=xT*A.wx[t]
+		for(D,E)in zip(B,A.we[t]):C+=D*E
+		return C.to(xT.dtype)
+class LossScheduler:
+	def __init__(A,timesteps,model):A.timesteps=timesteps;A.model=model;A.init_noise_sigma=1.;A.order=1
+	@staticmethod
+	def load(path):A,B,C=torch.load(path,map_location='cpu');D=LossSchedulerModel(B,C);return LossScheduler(A,D)
+	def save(A,path):B,C,D=A.timesteps,A.model.wx,A.model.we;torch.save((B,C,D),path)
+	def set_timesteps(A,num_inference_steps,device='cuda'):B=device;A.xT=_A;A.e_prev=[];A.t_prev=-1;A.model=A.model.to(B);A.timesteps=A.timesteps.to(B)
+	def scale_model_input(A,sample,*B,**C):return sample
+	@torch.no_grad()
+	def step(self,model_output,timestep,sample,*D,**E):
+		A=self;B=A.timesteps.tolist().index(timestep);assert A.t_prev==-1 or B==A.t_prev+1
+		if A.t_prev==-1:A.xT=sample
+		A.e_prev.append(model_output);C=A.model(B,A.xT,A.e_prev)
+		if B+1==len(A.timesteps):A.xT=_A;A.e_prev=[];A.t_prev=-1
+		else:A.t_prev=B
+		return C,
+class SchedulerWrapper:
+	def __init__(A,scheduler,loss_params_path='loss_params.pth'):A.scheduler=scheduler;A.catch_x,A.catch_e,A.catch_x_={},{},{};A.loss_scheduler=_A;A.loss_params_path=loss_params_path
+	def set_timesteps(A,num_inference_steps,**C):
+		D=num_inference_steps
+		if A.loss_scheduler is _A:B=A.scheduler.set_timesteps(D,**C);A.timesteps=A.scheduler.timesteps;A.init_noise_sigma=A.scheduler.init_noise_sigma;A.order=A.scheduler.order;return B
+		else:B=A.loss_scheduler.set_timesteps(D,**C);A.timesteps=A.loss_scheduler.timesteps;A.init_noise_sigma=A.scheduler.init_noise_sigma;A.order=A.scheduler.order;return B
+	def step(B,model_output,timestep,sample,**F):
+		D=sample;E=model_output;A=timestep
+		if B.loss_scheduler is _A:
+			C=B.scheduler.step(E,A,D,**F);A=A.tolist()
+			if A not in B.catch_x:B.catch_x[A]=[];B.catch_e[A]=[];B.catch_x_[A]=[]
+			B.catch_x[A].append(D.clone().detach().cpu());B.catch_e[A].append(E.clone().detach().cpu());B.catch_x_[A].append(C[0].clone().detach().cpu());return C
+		else:C=B.loss_scheduler.step(E,A,D,**F);return C
+	def scale_model_input(A,sample,timestep):return sample
+	def add_noise(A,original_samples,noise,timesteps):B=A.scheduler.add_noise(original_samples,noise,timesteps);return B
+	def get_path(C):
+		A=sorted([A for A in C.catch_x],reverse=True);B,D=[],[]
+		for E in A:F=torch.cat(C.catch_x[E],dim=0);B.append(F);G=torch.cat(C.catch_e[E],dim=0);D.append(G)
+		H=A[-1];I=torch.cat(C.catch_x_[H],dim=0);B.append(I);A=torch.tensor(A,dtype=torch.int32);B=torch.stack(B);D=torch.stack(D);return A,B,D
+	def load_loss_params(A):B,C,D=torch.load(A.loss_params_path,map_location='cpu');A.loss_model=LossSchedulerModel(C,D);A.loss_scheduler=LossScheduler(B,A.loss_model)
+	def prepare_loss(A,num_accelerate_steps=15):A.load_loss_params()
diff --git a/src/main.py b/src/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..95720d31c57fddcdf5318dff60cb0ea106c7037b
--- /dev/null
+++ b/src/main.py
@@ -0,0 +1,59 @@
+import atexit
+from io import BytesIO
+from multiprocessing.connection import Listener
+from os import chmod, remove
+from os.path import abspath, exists
+from pathlib import Path
+
+import torch
+
+from PIL.JpegImagePlugin import JpegImageFile
+from pipelines.models import TextToImageRequest
+
+from pipeline import load_pipeline, infer
+
+SOCKET = abspath(Path(__file__).parent.parent / "inferences.sock")
+
+
+def at_exit():
+    torch.cuda.empty_cache()
+
+
+def main():
+    atexit.register(at_exit)
+
+    print(f"Loading pipeline")
+    pipeline = load_pipeline()
+
+    print(f"Pipeline loaded, creating socket at '{SOCKET}'")
+
+    if exists(SOCKET):
+        remove(SOCKET)
+
+    with Listener(SOCKET) as listener:
+        chmod(SOCKET, 0o777)
+
+        print(f"Awaiting connections")
+        with listener.accept() as connection:
+            print(f"Connected")
+
+            while True:
+                try:
+                    request = TextToImageRequest.model_validate_json(connection.recv_bytes().decode("utf-8"))
+                except EOFError:
+                    print(f"Inference socket exiting")
+
+                    return
+
+                image = infer(request, pipeline)
+
+                data = BytesIO()
+                image.save(data, format=JpegImageFile.format)
+
+                packet = data.getvalue()
+
+                connection.send_bytes(packet)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/src/pipe/config.py b/src/pipe/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..3be8b352611cccbf36fa426e5fafb33533d41edf
--- /dev/null
+++ b/src/pipe/config.py
@@ -0,0 +1,162 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+from diffusers.models.transformers.transformer_sd3 import SD3Transformer2DModel
+from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
+
+sd3_common_transformer_block_config = {
+    "dummy_input": {
+        "hidden_states": (2, 4096, 1536),
+        "encoder_hidden_states": (2, 333, 1536),
+        "temb": (2, 1536),
+    },
+    "output_names": ["encoder_hidden_states_out", "hidden_states_out"],
+    "dynamic_axes": {
+        "hidden_states": {0: "batch_size"},
+        "encoder_hidden_states": {0: "batch_size"},
+        "temb": {0: "steps"},
+    },
+}
+
+ONNX_CONFIG = {
+    UNet2DConditionModel: {
+        "down_blocks.0": {
+            "dummy_input": {
+                "hidden_states": (2, 320, 128, 128),
+                "temb": (2, 1280),
+            },
+            "output_names": ["sample", "res_samples_0", "res_samples_1", "res_samples_2"],
+            "dynamic_axes": {
+                "hidden_states": {0: "batch_size"},
+                "temb": {0: "steps"},
+            },
+        },
+        "down_blocks.1": {
+            "dummy_input": {
+                "hidden_states": (2, 320, 64, 64),
+                "temb": (2, 1280),
+                "encoder_hidden_states": (2, 77, 2048),
+            },
+            "output_names": ["sample", "res_samples_0", "res_samples_1", "res_samples_2"],
+            "dynamic_axes": {
+                "hidden_states": {0: "batch_size"},
+                "temb": {0: "steps"},
+                "encoder_hidden_states": {0: "batch_size"},
+            },
+        },
+        "down_blocks.2": {
+            "dummy_input": {
+                "hidden_states": (2, 640, 32, 32),
+                "temb": (2, 1280),
+                "encoder_hidden_states": (2, 77, 2048),
+            },
+            "output_names": ["sample", "res_samples_0", "res_samples_1"],
+            "dynamic_axes": {
+                "hidden_states": {0: "batch_size"},
+                "temb": {0: "steps"},
+                "encoder_hidden_states": {0: "batch_size"},
+            },
+        },
+        "mid_block": {
+            "dummy_input": {
+                "hidden_states": (2, 1280, 32, 32),
+                "temb": (2, 1280),
+                "encoder_hidden_states": (2, 77, 2048),
+            },
+            "output_names": ["sample"],
+            "dynamic_axes": {
+                "hidden_states": {0: "batch_size"},
+                "temb": {0: "steps"},
+                "encoder_hidden_states": {0: "batch_size"},
+            },
+        },
+        "up_blocks.0": {
+            "dummy_input": {
+                "hidden_states": (2, 1280, 32, 32),
+                "res_hidden_states_0": (2, 640, 32, 32),
+                "res_hidden_states_1": (2, 1280, 32, 32),
+                "res_hidden_states_2": (2, 1280, 32, 32),
+                "temb": (2, 1280),
+                "encoder_hidden_states": (2, 77, 2048),
+            },
+            "output_names": ["sample"],
+            "dynamic_axes": {
+                "hidden_states": {0: "batch_size"},
+                "temb": {0: "steps"},
+                "encoder_hidden_states": {0: "batch_size"},
+                "res_hidden_states_0": {0: "batch_size"},
+                "res_hidden_states_1": {0: "batch_size"},
+                "res_hidden_states_2": {0: "batch_size"},
+            },
+        },
+        "up_blocks.1": {
+            "dummy_input": {
+                "hidden_states": (2, 1280, 64, 64),
+                "res_hidden_states_0": (2, 320, 64, 64),
+                "res_hidden_states_1": (2, 640, 64, 64),
+                "res_hidden_states_2": (2, 640, 64, 64),
+                "temb": (2, 1280),
+                "encoder_hidden_states": (2, 77, 2048),
+            },
+            "output_names": ["sample"],
+            "dynamic_axes": {
+                "hidden_states": {0: "batch_size"},
+                "temb": {0: "steps"},
+                "encoder_hidden_states": {0: "batch_size"},
+                "res_hidden_states_0": {0: "batch_size"},
+                "res_hidden_states_1": {0: "batch_size"},
+                "res_hidden_states_2": {0: "batch_size"},
+            },
+        },
+        "up_blocks.2": {
+            "dummy_input": {
+                "hidden_states": (2, 640, 128, 128),
+                "res_hidden_states_0": (2, 320, 128, 128),
+                "res_hidden_states_1": (2, 320, 128, 128),
+                "res_hidden_states_2": (2, 320, 128, 128),
+                "temb": (2, 1280),
+            },
+            "output_names": ["sample"],
+            "dynamic_axes": {
+                "hidden_states": {0: "batch_size"},
+                "temb": {0: "steps"},
+                "res_hidden_states_0": {0: "batch_size"},
+                "res_hidden_states_1": {0: "batch_size"},
+                "res_hidden_states_2": {0: "batch_size"},
+            },
+        },
+    },
+    SD3Transformer2DModel: {
+        **{f"transformer_blocks.{i}": sd3_common_transformer_block_config for i in range(23)},
+        "transformer_blocks.23": {
+            "dummy_input": {
+                "hidden_states": (2, 4096, 1536),
+                "encoder_hidden_states": (2, 333, 1536),
+                "temb": (2, 1536),
+            },
+            "output_names": ["hidden_states_out"],
+            "dynamic_axes": {
+                "hidden_states": {0: "batch_size"},
+                "encoder_hidden_states": {0: "batch_size"},
+                "temb": {0: "steps"},
+            },
+        },
+    },
+}
diff --git a/src/pipe/deploy.py b/src/pipe/deploy.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3d0b667dafa3e5a807beeb391efb6f7657dc616
--- /dev/null
+++ b/src/pipe/deploy.py
@@ -0,0 +1,210 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+import types
+from pathlib import Path
+
+import tensorrt as trt
+import torch
+from cache_diffusion.cachify import CACHED_PIPE, get_model
+from cuda import cudart
+from diffusers.models.transformers.transformer_sd3 import SD3Transformer2DModel
+from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
+from pipe.config import ONNX_CONFIG
+from pipe.models.sd3 import sd3_forward
+from pipe.models.sdxl import (
+    cachecrossattnupblock2d_forward,
+    cacheunet_forward,
+    cacheupblock2d_forward,
+)
+from polygraphy.backend.trt import (
+    CreateConfig,
+    Profile,
+    engine_from_network,
+    network_from_onnx_path,
+    save_engine,
+)
+from torch.onnx import export as onnx_export
+
+from .utils import Engine
+
+
+def replace_new_forward(backbone):
+    if backbone.__class__ == UNet2DConditionModel:
+        backbone.forward = types.MethodType(cacheunet_forward, backbone)
+        for upsample_block in backbone.up_blocks:
+            if (
+                hasattr(upsample_block, "has_cross_attention")
+                and upsample_block.has_cross_attention
+            ):
+                upsample_block.forward = types.MethodType(
+                    cachecrossattnupblock2d_forward, upsample_block
+                )
+            else:
+                upsample_block.forward = types.MethodType(cacheupblock2d_forward, upsample_block)
+    elif backbone.__class__ == SD3Transformer2DModel:
+        backbone.forward = types.MethodType(sd3_forward, backbone)
+
+
+def get_input_info(dummy_dict, info: str = None, batch_size: int = 1):
+    return_val = [] if info == "profile_shapes" or info == "input_names" else {}
+
+    def collect_leaf_keys(d):
+        for key, value in d.items():
+            if isinstance(value, dict):
+                collect_leaf_keys(value)
+            else:
+                value = (value[0] * batch_size,) + value[1:]
+                if info == "profile_shapes":
+                    return_val.append((key, value))  # type: ignore
+                elif info == "profile_shapes_dict":
+                    return_val[key] = value  # type: ignore
+                elif info == "dummy_input":
+                    return_val[key] = torch.ones(value).half().cuda()  # type: ignore
+                elif info == "input_names":
+                    return_val.append(key)  # type: ignore
+
+    collect_leaf_keys(dummy_dict)
+    return return_val
+
+
+def complie2trt(cls, onnx_path: Path, engine_path: Path, batch_size: int = 1):
+    subdirs = [f for f in onnx_path.iterdir() if f.is_dir()]
+    for subdir in subdirs:
+        if subdir.name not in ONNX_CONFIG[cls].keys():
+            continue
+        model_path = subdir / "model.onnx"
+        plan_path = engine_path / f"{subdir.name}.plan"
+        if not plan_path.exists():
+            print(f"Building {str(model_path)}")
+            build_profile = Profile()
+            profile_shapes = get_input_info(
+                ONNX_CONFIG[cls][subdir.name]["dummy_input"], "profile_shapes", batch_size
+            )
+            for input_name, input_shape in profile_shapes:
+                min_input_shape = (2,) + input_shape[1:]
+                build_profile.add(input_name, min_input_shape, input_shape, input_shape)
+            block_network = network_from_onnx_path(
+                str(model_path), flags=[trt.OnnxParserFlag.NATIVE_INSTANCENORM], strongly_typed=True
+            )
+            build_config = CreateConfig(
+                builder_optimization_level=6,
+                tf32=True,
+                #bf16=True,
+                profiles=[build_profile],
+            )
+            engine = engine_from_network(
+                block_network,
+                config=build_config,
+            )
+            save_engine(engine, path=plan_path)
+        else:
+            print(f"{str(model_path)} already exists!")
+
+
+def get_total_device_memory(backbone):
+    max_device_memory = 0
+    for _, engine in backbone.engines.items():
+        max_device_memory = max(max_device_memory, engine.engine.device_memory_size)
+    return max_device_memory
+
+
+def load_engines(backbone, engine_path: Path, batch_size: int = 1):
+    backbone.engines = {}
+    for f in engine_path.iterdir():
+        if f.is_file():
+            eng = Engine()
+            eng.load(str(f))
+            backbone.engines[f"{f.stem}"] = eng
+    _, shared_device_memory = cudart.cudaMalloc(get_total_device_memory(backbone))
+    for engine in backbone.engines.values():
+        engine.activate(shared_device_memory)
+    backbone.cuda_stream = cudart.cudaStreamCreate()[1]
+    for block_name in backbone.engines.keys():
+        backbone.engines[block_name].allocate_buffers(
+            shape_dict=get_input_info(
+                ONNX_CONFIG[backbone.__class__][block_name]["dummy_input"],
+                "profile_shapes_dict",
+                batch_size,
+            ),
+            device=backbone.device,
+            batch_size=batch_size,
+        )
+    # TODO: Free and clean up the origin pytorch cuda memory
+
+
+def export_onnx(backbone, onnx_path: Path):
+    for name, module in backbone.named_modules():
+        if isinstance(module, CACHED_PIPE[backbone.__class__]):
+            _onnx_dir = onnx_path.joinpath(f"{name}")
+            _onnx_file = _onnx_dir.joinpath("model.onnx")
+            if not _onnx_file.exists():
+                _onnx_dir.mkdir(parents=True, exist_ok=True)
+                dummy_input = get_input_info(
+                    ONNX_CONFIG[backbone.__class__][f"{name}"]["dummy_input"], "dummy_input"
+                )
+                input_names = get_input_info(
+                    ONNX_CONFIG[backbone.__class__][f"{name}"]["dummy_input"], "input_names"
+                )
+                output_names = ONNX_CONFIG[backbone.__class__][f"{name}"]["output_names"]
+                onnx_export(
+                    module,
+                    args=dummy_input,
+                    f=_onnx_file.as_posix(),
+                    input_names=input_names,
+                    output_names=output_names,
+                    dynamic_axes=ONNX_CONFIG[backbone.__class__][f"{name}"]["dynamic_axes"],
+                    do_constant_folding=True,
+                    opset_version=17,
+                )
+            else:
+                print(f"{str(_onnx_file)} alread exists!")
+
+
+def warm_up(backbone, batch_size: int = 1):
+    print("Warming-up TensorRT engines...")
+    for name, engine in backbone.engines.items():
+        dummy_input = get_input_info(
+            ONNX_CONFIG[backbone.__class__][name]["dummy_input"], "dummy_input", batch_size
+        )
+        _ = engine(dummy_input, backbone.cuda_stream)
+
+
+def teardown(pipe):
+    backbone = get_model(pipe)
+    for engine in backbone.engines.values():
+        del engine
+
+    cudart.cudaStreamDestroy(backbone.cuda_stream)
+    del backbone.cuda_stream
+
+
+def compile(pipe, onnx_path: Path, engine_path: Path, batch_size: int = 1):
+    backbone = get_model(pipe)
+    onnx_path.mkdir(parents=True, exist_ok=True)
+    engine_path.mkdir(parents=True, exist_ok=True)
+
+    replace_new_forward(backbone)
+    export_onnx(backbone, onnx_path)
+    complie2trt(backbone.__class__, onnx_path, engine_path, batch_size)
+    load_engines(backbone, engine_path, batch_size)
+    warm_up(backbone, batch_size)
+    backbone.use_trt_infer = True
diff --git a/src/pipe/models/sd3.py b/src/pipe/models/sd3.py
new file mode 100644
index 0000000000000000000000000000000000000000..33fb5f940b2382c099107af1a3e460c633ac3acd
--- /dev/null
+++ b/src/pipe/models/sd3.py
@@ -0,0 +1,159 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+from typing import Any, Dict, List, Optional, Union
+
+import torch
+from diffusers.models.modeling_outputs import Transformer2DModelOutput
+from diffusers.utils import (
+    USE_PEFT_BACKEND,
+    is_torch_version,
+    scale_lora_layers,
+    unscale_lora_layers,
+)
+
+
+def sd3_forward(
+    self,
+    hidden_states: torch.FloatTensor,
+    encoder_hidden_states: torch.FloatTensor = None,
+    pooled_projections: torch.FloatTensor = None,
+    timestep: torch.LongTensor = None,
+    block_controlnet_hidden_states: List = None,
+    joint_attention_kwargs: Optional[Dict[str, Any]] = None,
+    return_dict: bool = True,
+) -> Union[torch.FloatTensor, Transformer2DModelOutput]:
+    """
+    The [`SD3Transformer2DModel`] forward method.
+
+    Args:
+        hidden_states (`torch.FloatTensor` of shape `(batch size, channel, height, width)`):
+            Input `hidden_states`.
+        encoder_hidden_states (`torch.FloatTensor` of shape `(batch size, sequence_len, embed_dims)`):
+            Conditional embeddings (embeddings computed from the input conditions such as prompts) to use.
+        pooled_projections (`torch.FloatTensor` of shape `(batch_size, projection_dim)`): Embeddings projected
+            from the embeddings of input conditions.
+        timestep ( `torch.LongTensor`):
+            Used to indicate denoising step.
+        block_controlnet_hidden_states: (`list` of `torch.Tensor`):
+            A list of tensors that if specified are added to the residuals of transformer blocks.
+        joint_attention_kwargs (`dict`, *optional*):
+            A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
+            `self.processor` in
+            [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
+        return_dict (`bool`, *optional*, defaults to `True`):
+            Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain
+            tuple.
+
+    Returns:
+        If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a
+        `tuple` where the first element is the sample tensor.
+    """
+    if joint_attention_kwargs is not None:
+        joint_attention_kwargs = joint_attention_kwargs.copy()
+        lora_scale = joint_attention_kwargs.pop("scale", 1.0)
+    else:
+        lora_scale = 1.0
+
+    if USE_PEFT_BACKEND:
+        # weight the lora layers by setting `lora_scale` for each PEFT layer
+        scale_lora_layers(self, lora_scale)
+
+    height, width = hidden_states.shape[-2:]
+
+    hidden_states = self.pos_embed(hidden_states)  # takes care of adding positional embeddings too.
+    temb = self.time_text_embed(timestep, pooled_projections)
+    encoder_hidden_states = self.context_embedder(encoder_hidden_states)
+
+    for index_block, block in enumerate(self.transformer_blocks):
+        if self.training and self.gradient_checkpointing:
+
+            def create_custom_forward(module, return_dict=None):
+                def custom_forward(*inputs):
+                    if return_dict is not None:
+                        return module(*inputs, return_dict=return_dict)
+                    else:
+                        return module(*inputs)
+
+                return custom_forward
+
+            ckpt_kwargs: Dict[str, Any] = (
+                {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
+            )
+            encoder_hidden_states, hidden_states = torch.utils.checkpoint.checkpoint(
+                create_custom_forward(block),
+                hidden_states,
+                encoder_hidden_states,
+                temb,
+                **ckpt_kwargs,
+            )
+
+        else:
+            if hasattr(self, "use_trt_infer") and self.use_trt_infer:
+                feed_dict = {
+                    "hidden_states": hidden_states,
+                    "encoder_hidden_states": encoder_hidden_states,
+                    "temb": temb,
+                }
+                _results = self.engines[f"transformer_blocks.{index_block}"](
+                    feed_dict, self.cuda_stream
+                )
+                if index_block != 23:
+                    encoder_hidden_states = _results["encoder_hidden_states_out"]
+                hidden_states = _results["hidden_states_out"]
+            else:
+                encoder_hidden_states, hidden_states = block(
+                    hidden_states=hidden_states,
+                    encoder_hidden_states=encoder_hidden_states,
+                    temb=temb,
+                )
+
+        # controlnet residual
+        if block_controlnet_hidden_states is not None and block.context_pre_only is False:
+            interval_control = len(self.transformer_blocks) // len(block_controlnet_hidden_states)
+            hidden_states = (
+                hidden_states + block_controlnet_hidden_states[index_block // interval_control]
+            )
+
+    hidden_states = self.norm_out(hidden_states, temb)
+    hidden_states = self.proj_out(hidden_states)
+
+    # unpatchify
+    patch_size = self.config.patch_size
+    height = height // patch_size
+    width = width // patch_size
+
+    hidden_states = hidden_states.reshape(
+        shape=(hidden_states.shape[0], height, width, patch_size, patch_size, self.out_channels)
+    )
+    hidden_states = torch.einsum("nhwpqc->nchpwq", hidden_states)
+    output = hidden_states.reshape(
+        shape=(hidden_states.shape[0], self.out_channels, height * patch_size, width * patch_size)
+    )
+
+    if USE_PEFT_BACKEND:
+        # remove `lora_scale` from each PEFT layer
+        unscale_lora_layers(self, lora_scale)
+
+    if not return_dict:
+        return (output,)
+
+    return Transformer2DModelOutput(sample=output)
diff --git a/src/pipe/models/sdxl.py b/src/pipe/models/sdxl.py
new file mode 100644
index 0000000000000000000000000000000000000000..92aaece6ab8a3313be41825c48d4c2109f3d1df7
--- /dev/null
+++ b/src/pipe/models/sdxl.py
@@ -0,0 +1,275 @@
+# Adapted from
+# https://github.com/huggingface/diffusers/blob/73acebb8cfbd1d2954cabe1af4185f9994e61917/src/diffusers/models/unets/unet_2d_condition.py#L1039-L1312
+# https://github.com/huggingface/diffusers/blob/73acebb8cfbd1d2954cabe1af4185f9994e61917/src/diffusers/models/unets/unet_2d_blocks.py#L2482-L2564
+# https://github.com/huggingface/diffusers/blob/73acebb8cfbd1d2954cabe1af4185f9994e61917/src/diffusers/models/unets/unet_2d_blocks.py#L2617-L2679
+
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Not a contribution
+# Changes made by NVIDIA CORPORATION & AFFILIATES or otherwise documented as
+# NVIDIA-proprietary are not a contribution and subject to the following terms and conditions:
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+from typing import Any, Dict, Optional, Tuple, Union
+
+import torch
+from diffusers.models.unets.unet_2d_condition import UNet2DConditionOutput
+
+
+def cachecrossattnupblock2d_forward(
+    self,
+    hidden_states: torch.FloatTensor,
+    res_hidden_states_0: torch.FloatTensor,
+    res_hidden_states_1: torch.FloatTensor,
+    res_hidden_states_2: torch.FloatTensor,
+    temb: Optional[torch.FloatTensor] = None,
+    encoder_hidden_states: Optional[torch.FloatTensor] = None,
+    cross_attention_kwargs: Optional[Dict[str, Any]] = None,
+    upsample_size: Optional[int] = None,
+    attention_mask: Optional[torch.FloatTensor] = None,
+    encoder_attention_mask: Optional[torch.FloatTensor] = None,
+) -> torch.FloatTensor:
+    res_hidden_states_tuple = (res_hidden_states_0, res_hidden_states_1, res_hidden_states_2)
+    for resnet, attn in zip(self.resnets, self.attentions):
+        # pop res hidden states
+        res_hidden_states = res_hidden_states_tuple[-1]
+        res_hidden_states_tuple = res_hidden_states_tuple[:-1]
+
+        hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1)
+
+        hidden_states = resnet(hidden_states, temb)
+        hidden_states = attn(
+            hidden_states,
+            encoder_hidden_states=encoder_hidden_states,
+            cross_attention_kwargs=cross_attention_kwargs,
+            attention_mask=attention_mask,
+            encoder_attention_mask=encoder_attention_mask,
+            return_dict=False,
+        )[0]
+
+    if self.upsamplers is not None:
+        for upsampler in self.upsamplers:
+            hidden_states = upsampler(hidden_states, upsample_size)
+
+    return hidden_states
+
+
+def cacheupblock2d_forward(
+    self,
+    hidden_states: torch.FloatTensor,
+    res_hidden_states_0: torch.FloatTensor,
+    res_hidden_states_1: torch.FloatTensor,
+    res_hidden_states_2: torch.FloatTensor,
+    temb: Optional[torch.FloatTensor] = None,
+    upsample_size: Optional[int] = None,
+) -> torch.FloatTensor:
+    res_hidden_states_tuple = (res_hidden_states_0, res_hidden_states_1, res_hidden_states_2)
+    for resnet in self.resnets:
+        # pop res hidden states
+        res_hidden_states = res_hidden_states_tuple[-1]
+        res_hidden_states_tuple = res_hidden_states_tuple[:-1]
+
+        hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1)
+
+        hidden_states = resnet(hidden_states, temb)
+
+    if self.upsamplers is not None:
+        for upsampler in self.upsamplers:
+            hidden_states = upsampler(hidden_states, upsample_size)
+
+    return hidden_states
+
+
+def cacheunet_forward(
+    self,
+    sample: torch.FloatTensor,
+    timestep: Union[torch.Tensor, float, int],
+    encoder_hidden_states: torch.Tensor,
+    class_labels: Optional[torch.Tensor] = None,
+    timestep_cond: Optional[torch.Tensor] = None,
+    attention_mask: Optional[torch.Tensor] = None,
+    cross_attention_kwargs: Optional[Dict[str, Any]] = None,
+    added_cond_kwargs: Optional[Dict[str, torch.Tensor]] = None,
+    down_block_additional_residuals: Optional[Tuple[torch.Tensor]] = None,
+    mid_block_additional_residual: Optional[torch.Tensor] = None,
+    down_intrablock_additional_residuals: Optional[Tuple[torch.Tensor]] = None,
+    encoder_attention_mask: Optional[torch.Tensor] = None,
+    return_dict: bool = True,
+) -> Union[UNet2DConditionOutput, Tuple]:
+    # 1. time
+    t_emb = self.get_time_embed(sample=sample, timestep=timestep)
+    emb = self.time_embedding(t_emb, timestep_cond)
+    aug_emb = None
+
+    aug_emb = self.get_aug_embed(
+        emb=emb,
+        encoder_hidden_states=encoder_hidden_states,
+        added_cond_kwargs=added_cond_kwargs,
+    )
+
+    emb = emb + aug_emb if aug_emb is not None else emb
+
+    encoder_hidden_states = self.process_encoder_hidden_states(
+        encoder_hidden_states=encoder_hidden_states, added_cond_kwargs=added_cond_kwargs
+    )
+
+    # 2. pre-process
+    sample = self.conv_in(sample)
+
+    if hasattr(self, "_export_precess_onnx") and self._export_precess_onnx:
+        return (
+            sample,
+            encoder_hidden_states,
+            emb,
+        )
+
+    down_block_res_samples = (sample,)
+    for i, downsample_block in enumerate(self.down_blocks):
+        if (
+            hasattr(downsample_block, "has_cross_attention")
+            and downsample_block.has_cross_attention
+        ):
+            if hasattr(self, "use_trt_infer") and self.use_trt_infer:
+                feed_dict = {
+                    "hidden_states": sample,
+                    "temb": emb,
+                    "encoder_hidden_states": encoder_hidden_states,
+                }
+                down_results = self.engines[f"down_blocks.{i}"](feed_dict, self.cuda_stream)
+                sample = down_results["sample"]
+                res_samples_0 = down_results["res_samples_0"]
+                res_samples_1 = down_results["res_samples_1"]
+                if "res_samples_2" in down_results.keys():
+                    res_samples_2 = down_results["res_samples_2"]
+            else:
+                # For t2i-adapter CrossAttnDownBlock2D
+                additional_residuals = {}
+
+                sample, res_samples = downsample_block(
+                    hidden_states=sample,
+                    temb=emb,
+                    encoder_hidden_states=encoder_hidden_states,
+                    attention_mask=attention_mask,
+                    cross_attention_kwargs=cross_attention_kwargs,
+                    encoder_attention_mask=encoder_attention_mask,
+                    **additional_residuals,
+                )
+        else:
+            if hasattr(self, "use_trt_infer") and self.use_trt_infer:
+                feed_dict = {"hidden_states": sample, "temb": emb}
+                down_results = self.engines[f"down_blocks.{i}"](feed_dict, self.cuda_stream)
+                sample = down_results["sample"]
+                res_samples_0 = down_results["res_samples_0"]
+                res_samples_1 = down_results["res_samples_1"]
+                if "res_samples_2" in down_results.keys():
+                    res_samples_2 = down_results["res_samples_2"]
+            else:
+                sample, res_samples = downsample_block(hidden_states=sample, temb=emb)
+
+        if hasattr(self, "use_trt_infer") and self.use_trt_infer:
+            down_block_res_samples += (
+                res_samples_0,
+                res_samples_1,
+            )
+            if "res_samples_2" in down_results.keys():
+                down_block_res_samples += (res_samples_2,)
+        else:
+            down_block_res_samples += res_samples
+
+    if hasattr(self, "use_trt_infer") and self.use_trt_infer:
+        feed_dict = {
+            "hidden_states": sample,
+            "temb": emb,
+            "encoder_hidden_states": encoder_hidden_states,
+        }
+        mid_results = self.engines["mid_block"](feed_dict, self.cuda_stream)
+        sample = mid_results["sample"]
+    else:
+        sample = self.mid_block(
+            sample,
+            emb,
+            encoder_hidden_states=encoder_hidden_states,
+            attention_mask=attention_mask,
+            cross_attention_kwargs=cross_attention_kwargs,
+            encoder_attention_mask=encoder_attention_mask,
+        )
+
+    # 5. up
+    for i, upsample_block in enumerate(self.up_blocks):
+        res_samples = down_block_res_samples[-len(upsample_block.resnets) :]
+        down_block_res_samples = down_block_res_samples[: -len(upsample_block.resnets)]
+
+        if hasattr(upsample_block, "has_cross_attention") and upsample_block.has_cross_attention:
+            if hasattr(self, "use_trt_infer") and self.use_trt_infer:
+                feed_dict = {
+                    "hidden_states": sample,
+                    "res_hidden_states_0": res_samples[0],
+                    "res_hidden_states_1": res_samples[1],
+                    "res_hidden_states_2": res_samples[2],
+                    "temb": emb,
+                    "encoder_hidden_states": encoder_hidden_states,
+                }
+                up_results = self.engines[f"up_blocks.{i}"](feed_dict, self.cuda_stream)
+                sample = up_results["sample"]
+            else:
+                sample = upsample_block(
+                    hidden_states=sample,
+                    temb=emb,
+                    res_hidden_states_0=res_samples[0],
+                    res_hidden_states_1=res_samples[1],
+                    res_hidden_states_2=res_samples[2],
+                    encoder_hidden_states=encoder_hidden_states,
+                    cross_attention_kwargs=cross_attention_kwargs,
+                    attention_mask=attention_mask,
+                    encoder_attention_mask=encoder_attention_mask,
+                )
+        else:
+            if hasattr(self, "use_trt_infer") and self.use_trt_infer:
+                feed_dict = {
+                    "hidden_states": sample,
+                    "res_hidden_states_0": res_samples[0],
+                    "res_hidden_states_1": res_samples[1],
+                    "res_hidden_states_2": res_samples[2],
+                    "temb": emb,
+                }
+                up_results = self.engines[f"up_blocks.{i}"](feed_dict, self.cuda_stream)
+                sample = up_results["sample"]
+            else:
+                sample = upsample_block(
+                    hidden_states=sample,
+                    temb=emb,
+                    res_hidden_states_0=res_samples[0],
+                    res_hidden_states_1=res_samples[1],
+                    res_hidden_states_2=res_samples[2],
+                )
+
+    # 6. post-process
+    if self.conv_norm_out:
+        sample = self.conv_norm_out(sample)
+        sample = self.conv_act(sample)
+    sample = self.conv_out(sample)
+
+    if not return_dict:
+        return (sample,)
+
+    return UNet2DConditionOutput(sample=sample)
diff --git a/src/pipe/utils.py b/src/pipe/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..340f2ea58f10e9bf11fa214ce8a55b589b5c0b70
--- /dev/null
+++ b/src/pipe/utils.py
@@ -0,0 +1,129 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+from collections import OrderedDict
+
+import numpy as np
+import tensorrt as trt
+import torch
+from cuda import cudart
+from polygraphy.backend.common import bytes_from_path
+from polygraphy.backend.trt import engine_from_bytes
+
+numpy_to_torch_dtype_dict = {
+    np.uint8: torch.uint8,
+    np.int8: torch.int8,
+    np.int16: torch.int16,
+    np.int32: torch.int32,
+    np.int64: torch.int64,
+    np.float16: torch.float16,
+    np.float32: torch.float32,
+    np.float64: torch.float64,
+    np.complex64: torch.complex64,
+    np.complex128: torch.complex128,
+}
+
+
+class Engine:
+    def __init__(
+        self,
+    ):
+        self.engine = None
+        self.context = None
+        self.buffers = OrderedDict()
+        self.tensors = OrderedDict()
+        self.cuda_graph_instance = None  # cuda graph
+        self.has_cross_attention = False
+
+    def __del__(self):
+        del self.engine
+        del self.context
+        del self.buffers
+        del self.tensors
+
+    def load(self, engine_path):
+        self.engine = engine_from_bytes(bytes_from_path(engine_path))
+
+    def activate(self, reuse_device_memory=None):
+        if reuse_device_memory:
+            self.context = self.engine.create_execution_context_without_device_memory()  # type: ignore
+            self.context.device_memory = reuse_device_memory
+        else:
+            self.context = self.engine.create_execution_context()  # type: ignore
+
+    def allocate_buffers(self, shape_dict=None, device="cuda", batch_size=1):
+        for binding in range(self.engine.num_io_tensors):  # type: ignore
+            name = self.engine.get_tensor_name(binding)  # type: ignore
+            if shape_dict and name in shape_dict:
+                shape = shape_dict[name]
+            else:
+                shape = self.engine.get_tensor_shape(name)  # type: ignore
+                shape = (batch_size * 2,) + shape[1:]
+            dtype = trt.nptype(self.engine.get_tensor_dtype(name))  # type: ignore
+            if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:  # type: ignore
+                self.context.set_input_shape(name, shape)  # type: ignore
+            tensor = torch.empty(tuple(shape), dtype=numpy_to_torch_dtype_dict[dtype]).to(
+                device=device
+            )
+            self.tensors[name] = tensor
+
+    def __call__(self, feed_dict, stream, use_cuda_graph=False):
+        for name, buf in feed_dict.items():
+            self.tensors[name].copy_(buf)
+
+        for name, tensor in self.tensors.items():
+            self.context.set_tensor_address(name, tensor.data_ptr())  # type: ignore
+
+        if use_cuda_graph:
+            if self.cuda_graph_instance is not None:
+                cuassert(cudart.cudaGraphLaunch(self.cuda_graph_instance, stream))
+                cuassert(cudart.cudaStreamSynchronize(stream))
+            else:
+                # do inference before CUDA graph capture
+                noerror = self.context.execute_async_v3(stream)  # type: ignore
+                if not noerror:
+                    raise ValueError("ERROR: inference failed.")
+                # capture cuda graph
+                cuassert(
+                    cudart.cudaStreamBeginCapture(
+                        stream, cudart.cudaStreamCaptureMode.cudaStreamCaptureModeGlobal
+                    )
+                )
+                self.context.execute_async_v3(stream)  # type: ignore
+                self.graph = cuassert(cudart.cudaStreamEndCapture(stream))
+                self.cuda_graph_instance = cuassert(cudart.cudaGraphInstantiate(self.graph, 0))
+        else:
+            noerror = self.context.execute_async_v3(stream)  # type: ignore
+            if not noerror:
+                raise ValueError("ERROR: inference failed.")
+
+        return self.tensors
+
+
+def cuassert(cuda_ret):
+    err = cuda_ret[0]
+    if err != cudart.cudaError_t.cudaSuccess:
+        raise RuntimeError(
+            f"CUDA ERROR: {err}, error code reference: https://nvidia.github.io/cuda-python/module/cudart.html#cuda.cudart.cudaError_t"
+        )
+    if len(cuda_ret) > 1:
+        return cuda_ret[1]
+    return None