Initial commit with folder contents
Browse files- .gitattributes +326 -5
- .gitignore +2 -0
- .gitmodules +3 -0
- engine/down_blocks.0.plan +3 -0
- engine/down_blocks.1.plan +3 -0
- engine/down_blocks.2.plan +3 -0
- engine/mid_block.plan +3 -0
- engine/up_blocks.0.plan +3 -0
- engine/up_blocks.1.plan +3 -0
- engine/up_blocks.2.plan +3 -0
- loss_params.pth +2 -2
- pyproject.toml +10 -19
- src/cache_diffusion/cachify.py +144 -0
- src/cache_diffusion/module.py +55 -0
- src/cache_diffusion/utils.py +61 -0
- src/pipeline.py +46 -974
- src/trt_pipeline/config.py +162 -0
- src/trt_pipeline/deploy.py +144 -0
- src/trt_pipeline/models/sd3.py +159 -0
- src/trt_pipeline/models/sdxl.py +275 -0
- src/trt_pipeline/utils.py +129 -0
- uv.lock +116 -291
.gitattributes
CHANGED
|
@@ -33,8 +33,329 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
onnx/up_blocks.0/onnx__MatMul_6936 filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
onnx/up_blocks.0/onnx__MatMul_6920 filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
onnx/up_blocks.0/onnx__MatMul_6912 filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
onnx/up_blocks.0/onnx__MatMul_7600 filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
onnx/up_blocks.0/onnx__MatMul_6955 filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
onnx/up_blocks.0/onnx__MatMul_6958 filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
onnx/up_blocks.0/resnets.0.time_emb_proj.weight filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
onnx/up_blocks.0/onnx__MatMul_7203 filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
onnx/up_blocks.0/onnx__MatMul_7624 filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
onnx/up_blocks.0/onnx__MatMul_6910 filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
onnx/up_blocks.0/onnx__MatMul_6969 filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
onnx/up_blocks.0/onnx__MatMul_6909 filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
onnx/up_blocks.0/onnx__MatMul_6911 filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
onnx/up_blocks.0/onnx__MatMul_6935 filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
onnx/up_blocks.0/onnx__MatMul_7003 filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
onnx/up_blocks.0/onnx__MatMul_6979 filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
onnx/up_blocks.0/resnets.2.time_emb_proj.weight filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
onnx/up_blocks.0/onnx__MatMul_7193 filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
onnx/up_blocks.0/onnx__MatMul_6921 filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
onnx/up_blocks.0/onnx__MatMul_7017 filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
onnx/up_blocks.0/onnx__MatMul_7609 filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
onnx/up_blocks.0/onnx__MatMul_6982 filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
onnx/up_blocks.0/onnx__MatMul_7169 filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
onnx/up_blocks.0/onnx__MatMul_6984 filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
onnx/up_blocks.0/onnx__MatMul_7182 filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
onnx/up_blocks.0/onnx__MatMul_7031 filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
onnx/up_blocks.0/onnx__MatMul_7041 filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
onnx/up_blocks.0/onnx__MatMul_6934 filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
onnx/up_blocks.0/onnx__MatMul_7619 filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
onnx/up_blocks.0/onnx__MatMul_6931 filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
onnx/up_blocks.0/onnx__MatMul_6968 filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
onnx/up_blocks.0/onnx__MatMul_7160 filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
onnx/up_blocks.0/onnx__MatMul_7008 filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
onnx/up_blocks.0/onnx__MatMul_6945 filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
onnx/up_blocks.0/onnx__MatMul_7632 filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
onnx/up_blocks.0/onnx__MatMul_7168 filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
onnx/up_blocks.0/onnx__MatMul_7007 filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
onnx/up_blocks.0/onnx__MatMul_7030 filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
onnx/up_blocks.0/resnets.1.time_emb_proj.weight filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
onnx/up_blocks.0/onnx__MatMul_7103 filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
onnx/up_blocks.0/onnx__MatMul_7040 filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
onnx/up_blocks.0/onnx__MatMul_7065 filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
onnx/up_blocks.0/onnx__MatMul_7157 filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
onnx/up_blocks.0/onnx__MatMul_7055 filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
onnx/up_blocks.0/onnx__MatMul_7006 filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
onnx/up_blocks.0/onnx__MatMul_7089 filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
onnx/up_blocks.0/onnx__MatMul_6993 filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
onnx/up_blocks.0/resnets.2.conv_shortcut.weight filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
onnx/up_blocks.0/onnx__MatMul_7054 filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
onnx/up_blocks.0/onnx__MatMul_7099 filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
onnx/up_blocks.0/onnx__MatMul_7158 filter=lfs diff=lfs merge=lfs -text
|
| 87 |
+
onnx/up_blocks.0/onnx__MatMul_7137 filter=lfs diff=lfs merge=lfs -text
|
| 88 |
+
onnx/up_blocks.0/onnx__MatMul_7027 filter=lfs diff=lfs merge=lfs -text
|
| 89 |
+
onnx/up_blocks.0/onnx__MatMul_7147 filter=lfs diff=lfs merge=lfs -text
|
| 90 |
+
onnx/up_blocks.0/onnx__MatMul_7112 filter=lfs diff=lfs merge=lfs -text
|
| 91 |
+
onnx/up_blocks.0/onnx__MatMul_7128 filter=lfs diff=lfs merge=lfs -text
|
| 92 |
+
onnx/up_blocks.0/onnx__MatMul_7104 filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
onnx/up_blocks.0/onnx__MatMul_7016 filter=lfs diff=lfs merge=lfs -text
|
| 94 |
+
onnx/up_blocks.0/onnx__MatMul_6960 filter=lfs diff=lfs merge=lfs -text
|
| 95 |
+
onnx/up_blocks.0/onnx__MatMul_6994 filter=lfs diff=lfs merge=lfs -text
|
| 96 |
+
onnx/up_blocks.0/onnx__MatMul_7126 filter=lfs diff=lfs merge=lfs -text
|
| 97 |
+
onnx/up_blocks.0/onnx__MatMul_7018 filter=lfs diff=lfs merge=lfs -text
|
| 98 |
+
onnx/up_blocks.0/onnx__MatMul_6922 filter=lfs diff=lfs merge=lfs -text
|
| 99 |
+
onnx/up_blocks.0/onnx__MatMul_6970 filter=lfs diff=lfs merge=lfs -text
|
| 100 |
+
onnx/up_blocks.0/onnx__MatMul_7611 filter=lfs diff=lfs merge=lfs -text
|
| 101 |
+
onnx/up_blocks.0/onnx__MatMul_7042 filter=lfs diff=lfs merge=lfs -text
|
| 102 |
+
onnx/up_blocks.0/onnx__MatMul_6947 filter=lfs diff=lfs merge=lfs -text
|
| 103 |
+
onnx/up_blocks.0/onnx__MatMul_7019 filter=lfs diff=lfs merge=lfs -text
|
| 104 |
+
onnx/up_blocks.0/onnx__MatMul_7634 filter=lfs diff=lfs merge=lfs -text
|
| 105 |
+
onnx/up_blocks.0/onnx__MatMul_7192 filter=lfs diff=lfs merge=lfs -text
|
| 106 |
+
onnx/up_blocks.0/onnx__MatMul_6971 filter=lfs diff=lfs merge=lfs -text
|
| 107 |
+
onnx/up_blocks.0/resnets.0.conv_shortcut.weight filter=lfs diff=lfs merge=lfs -text
|
| 108 |
+
onnx/up_blocks.0/onnx__MatMul_7633 filter=lfs diff=lfs merge=lfs -text
|
| 109 |
+
onnx/up_blocks.0/onnx__MatMul_7090 filter=lfs diff=lfs merge=lfs -text
|
| 110 |
+
onnx/up_blocks.0/onnx__MatMul_7241 filter=lfs diff=lfs merge=lfs -text
|
| 111 |
+
onnx/up_blocks.0/onnx__MatMul_7032 filter=lfs diff=lfs merge=lfs -text
|
| 112 |
+
onnx/up_blocks.0/onnx__MatMul_6995 filter=lfs diff=lfs merge=lfs -text
|
| 113 |
+
onnx/up_blocks.0/onnx__MatMul_6946 filter=lfs diff=lfs merge=lfs -text
|
| 114 |
+
onnx/up_blocks.0/onnx__MatMul_7227 filter=lfs diff=lfs merge=lfs -text
|
| 115 |
+
onnx/up_blocks.0/onnx__MatMul_7635 filter=lfs diff=lfs merge=lfs -text
|
| 116 |
+
onnx/up_blocks.0/onnx__MatMul_7232 filter=lfs diff=lfs merge=lfs -text
|
| 117 |
+
onnx/up_blocks.0/onnx__MatMul_7043 filter=lfs diff=lfs merge=lfs -text
|
| 118 |
+
onnx/up_blocks.0/onnx__MatMul_7136 filter=lfs diff=lfs merge=lfs -text
|
| 119 |
+
onnx/up_blocks.0/onnx__MatMul_7587 filter=lfs diff=lfs merge=lfs -text
|
| 120 |
+
onnx/up_blocks.0/onnx__MatMul_7127 filter=lfs diff=lfs merge=lfs -text
|
| 121 |
+
onnx/up_blocks.0/onnx__MatMul_7088 filter=lfs diff=lfs merge=lfs -text
|
| 122 |
+
onnx/up_blocks.0/onnx__MatMul_7254 filter=lfs diff=lfs merge=lfs -text
|
| 123 |
+
onnx/up_blocks.0/onnx__MatMul_6923 filter=lfs diff=lfs merge=lfs -text
|
| 124 |
+
onnx/up_blocks.0/onnx__MatMul_7080 filter=lfs diff=lfs merge=lfs -text
|
| 125 |
+
onnx/up_blocks.0/onnx__MatMul_7056 filter=lfs diff=lfs merge=lfs -text
|
| 126 |
+
onnx/up_blocks.0/onnx__MatMul_7123 filter=lfs diff=lfs merge=lfs -text
|
| 127 |
+
onnx/up_blocks.0/onnx__MatMul_7075 filter=lfs diff=lfs merge=lfs -text
|
| 128 |
+
onnx/up_blocks.0/resnets.1.conv_shortcut.weight filter=lfs diff=lfs merge=lfs -text
|
| 129 |
+
onnx/up_blocks.0/onnx__MatMul_7064 filter=lfs diff=lfs merge=lfs -text
|
| 130 |
+
onnx/up_blocks.0/onnx__MatMul_7051 filter=lfs diff=lfs merge=lfs -text
|
| 131 |
+
onnx/up_blocks.0/onnx__MatMul_7079 filter=lfs diff=lfs merge=lfs -text
|
| 132 |
+
onnx/up_blocks.0/onnx__MatMul_7347 filter=lfs diff=lfs merge=lfs -text
|
| 133 |
+
onnx/up_blocks.0/onnx__MatMul_7371 filter=lfs diff=lfs merge=lfs -text
|
| 134 |
+
onnx/up_blocks.0/onnx__MatMul_7102 filter=lfs diff=lfs merge=lfs -text
|
| 135 |
+
onnx/up_blocks.0/onnx__MatMul_7078 filter=lfs diff=lfs merge=lfs -text
|
| 136 |
+
onnx/up_blocks.0/onnx__MatMul_7336 filter=lfs diff=lfs merge=lfs -text
|
| 137 |
+
onnx/up_blocks.0/onnx__MatMul_7360 filter=lfs diff=lfs merge=lfs -text
|
| 138 |
+
onnx/up_blocks.0/onnx__MatMul_7361 filter=lfs diff=lfs merge=lfs -text
|
| 139 |
+
onnx/up_blocks.0/onnx__MatMul_7179 filter=lfs diff=lfs merge=lfs -text
|
| 140 |
+
onnx/up_blocks.0/onnx__MatMul_7159 filter=lfs diff=lfs merge=lfs -text
|
| 141 |
+
onnx/up_blocks.0/onnx__MatMul_7184 filter=lfs diff=lfs merge=lfs -text
|
| 142 |
+
onnx/up_blocks.0/onnx__MatMul_7113 filter=lfs diff=lfs merge=lfs -text
|
| 143 |
+
onnx/up_blocks.0/onnx__MatMul_7328 filter=lfs diff=lfs merge=lfs -text
|
| 144 |
+
onnx/up_blocks.0/onnx__MatMul_7376 filter=lfs diff=lfs merge=lfs -text
|
| 145 |
+
onnx/up_blocks.0/onnx__MatMul_7337 filter=lfs diff=lfs merge=lfs -text
|
| 146 |
+
onnx/up_blocks.0/onnx__MatMul_7115 filter=lfs diff=lfs merge=lfs -text
|
| 147 |
+
onnx/up_blocks.0/onnx__MatMul_7171 filter=lfs diff=lfs merge=lfs -text
|
| 148 |
+
onnx/up_blocks.0/onnx__MatMul_7407 filter=lfs diff=lfs merge=lfs -text
|
| 149 |
+
onnx/up_blocks.0/onnx__MatMul_7256 filter=lfs diff=lfs merge=lfs -text
|
| 150 |
+
onnx/up_blocks.0/onnx__MatMul_7216 filter=lfs diff=lfs merge=lfs -text
|
| 151 |
+
onnx/up_blocks.0/onnx__MatMul_7326 filter=lfs diff=lfs merge=lfs -text
|
| 152 |
+
onnx/up_blocks.0/onnx__MatMul_7251 filter=lfs diff=lfs merge=lfs -text
|
| 153 |
+
onnx/up_blocks.0/onnx__MatMul_7208 filter=lfs diff=lfs merge=lfs -text
|
| 154 |
+
onnx/up_blocks.0/onnx__MatMul_7303 filter=lfs diff=lfs merge=lfs -text
|
| 155 |
+
onnx/up_blocks.0/onnx__MatMul_7217 filter=lfs diff=lfs merge=lfs -text
|
| 156 |
+
onnx/up_blocks.0/onnx__MatMul_7350 filter=lfs diff=lfs merge=lfs -text
|
| 157 |
+
onnx/up_blocks.0/onnx__MatMul_7231 filter=lfs diff=lfs merge=lfs -text
|
| 158 |
+
onnx/up_blocks.0/onnx__MatMul_7183 filter=lfs diff=lfs merge=lfs -text
|
| 159 |
+
onnx/up_blocks.0/onnx__MatMul_7265 filter=lfs diff=lfs merge=lfs -text
|
| 160 |
+
onnx/up_blocks.0/onnx__MatMul_7240 filter=lfs diff=lfs merge=lfs -text
|
| 161 |
+
onnx/up_blocks.0/onnx__MatMul_7206 filter=lfs diff=lfs merge=lfs -text
|
| 162 |
+
onnx/up_blocks.0/onnx__MatMul_7323 filter=lfs diff=lfs merge=lfs -text
|
| 163 |
+
onnx/up_blocks.0/onnx__MatMul_7207 filter=lfs diff=lfs merge=lfs -text
|
| 164 |
+
onnx/up_blocks.0/onnx__MatMul_7289 filter=lfs diff=lfs merge=lfs -text
|
| 165 |
+
onnx/up_blocks.0/onnx__MatMul_7091 filter=lfs diff=lfs merge=lfs -text
|
| 166 |
+
onnx/up_blocks.0/onnx__MatMul_7230 filter=lfs diff=lfs merge=lfs -text
|
| 167 |
+
onnx/up_blocks.0/onnx__MatMul_7066 filter=lfs diff=lfs merge=lfs -text
|
| 168 |
+
onnx/up_blocks.0/onnx__MatMul_6944 filter=lfs diff=lfs merge=lfs -text
|
| 169 |
+
onnx/up_blocks.0/onnx__MatMul_7067 filter=lfs diff=lfs merge=lfs -text
|
| 170 |
+
onnx/up_blocks.0/onnx__MatMul_7290 filter=lfs diff=lfs merge=lfs -text
|
| 171 |
+
onnx/up_blocks.0/onnx__MatMul_6983 filter=lfs diff=lfs merge=lfs -text
|
| 172 |
+
onnx/up_blocks.0/onnx__MatMul_7139 filter=lfs diff=lfs merge=lfs -text
|
| 173 |
+
onnx/up_blocks.0/onnx__MatMul_7302 filter=lfs diff=lfs merge=lfs -text
|
| 174 |
+
onnx/up_blocks.0/onnx__MatMul_6992 filter=lfs diff=lfs merge=lfs -text
|
| 175 |
+
onnx/up_blocks.0/onnx__MatMul_7315 filter=lfs diff=lfs merge=lfs -text
|
| 176 |
+
onnx/up_blocks.0/onnx__MatMul_6959 filter=lfs diff=lfs merge=lfs -text
|
| 177 |
+
onnx/up_blocks.0/onnx__MatMul_7299 filter=lfs diff=lfs merge=lfs -text
|
| 178 |
+
onnx/up_blocks.0/onnx__MatMul_7386 filter=lfs diff=lfs merge=lfs -text
|
| 179 |
+
onnx/up_blocks.0/onnx__MatMul_7138 filter=lfs diff=lfs merge=lfs -text
|
| 180 |
+
onnx/up_blocks.0/onnx__MatMul_7363 filter=lfs diff=lfs merge=lfs -text
|
| 181 |
+
onnx/up_blocks.0/onnx__MatMul_7312 filter=lfs diff=lfs merge=lfs -text
|
| 182 |
+
onnx/up_blocks.0/onnx__MatMul_7114 filter=lfs diff=lfs merge=lfs -text
|
| 183 |
+
onnx/up_blocks.0/onnx__MatMul_7219 filter=lfs diff=lfs merge=lfs -text
|
| 184 |
+
onnx/up_blocks.0/onnx__MatMul_7150 filter=lfs diff=lfs merge=lfs -text
|
| 185 |
+
onnx/up_blocks.0/onnx__MatMul_7195 filter=lfs diff=lfs merge=lfs -text
|
| 186 |
+
onnx/up_blocks.0/onnx__MatMul_7351 filter=lfs diff=lfs merge=lfs -text
|
| 187 |
+
onnx/up_blocks.0/onnx__MatMul_7264 filter=lfs diff=lfs merge=lfs -text
|
| 188 |
+
onnx/up_blocks.0/onnx__MatMul_7327 filter=lfs diff=lfs merge=lfs -text
|
| 189 |
+
onnx/up_blocks.0/onnx__MatMul_7280 filter=lfs diff=lfs merge=lfs -text
|
| 190 |
+
onnx/up_blocks.0/onnx__MatMul_7170 filter=lfs diff=lfs merge=lfs -text
|
| 191 |
+
onnx/up_blocks.0/onnx__MatMul_7313 filter=lfs diff=lfs merge=lfs -text
|
| 192 |
+
onnx/up_blocks.0/onnx__MatMul_7255 filter=lfs diff=lfs merge=lfs -text
|
| 193 |
+
onnx/up_blocks.0/onnx__MatMul_7242 filter=lfs diff=lfs merge=lfs -text
|
| 194 |
+
onnx/up_blocks.0/onnx__MatMul_7278 filter=lfs diff=lfs merge=lfs -text
|
| 195 |
+
onnx/up_blocks.0/onnx__MatMul_7291 filter=lfs diff=lfs merge=lfs -text
|
| 196 |
+
onnx/up_blocks.0/onnx__MatMul_7279 filter=lfs diff=lfs merge=lfs -text
|
| 197 |
+
onnx/up_blocks.0/onnx__MatMul_7288 filter=lfs diff=lfs merge=lfs -text
|
| 198 |
+
onnx/up_blocks.0/onnx__MatMul_7304 filter=lfs diff=lfs merge=lfs -text
|
| 199 |
+
onnx/up_blocks.0/onnx__MatMul_7352 filter=lfs diff=lfs merge=lfs -text
|
| 200 |
+
onnx/up_blocks.0/onnx__MatMul_7218 filter=lfs diff=lfs merge=lfs -text
|
| 201 |
+
onnx/up_blocks.0/onnx__MatMul_7499 filter=lfs diff=lfs merge=lfs -text
|
| 202 |
+
engine/down_blocks.0.plan filter=lfs diff=lfs merge=lfs -text
|
| 203 |
+
onnx/up_blocks.0/onnx__MatMul_7432 filter=lfs diff=lfs merge=lfs -text
|
| 204 |
+
onnx/up_blocks.0/onnx__MatMul_7503 filter=lfs diff=lfs merge=lfs -text
|
| 205 |
+
onnx/up_blocks.0/onnx__MatMul_7480 filter=lfs diff=lfs merge=lfs -text
|
| 206 |
+
onnx/up_blocks.0/onnx__MatMul_7488 filter=lfs diff=lfs merge=lfs -text
|
| 207 |
+
onnx/up_blocks.0/onnx__MatMul_7464 filter=lfs diff=lfs merge=lfs -text
|
| 208 |
+
onnx/up_blocks.0/onnx__MatMul_7475 filter=lfs diff=lfs merge=lfs -text
|
| 209 |
+
onnx/up_blocks.0/onnx__MatMul_7502 filter=lfs diff=lfs merge=lfs -text
|
| 210 |
+
onnx/up_blocks.0/onnx__MatMul_7513 filter=lfs diff=lfs merge=lfs -text
|
| 211 |
+
onnx/up_blocks.0/onnx__MatMul_7527 filter=lfs diff=lfs merge=lfs -text
|
| 212 |
+
onnx/up_blocks.0/onnx__MatMul_7537 filter=lfs diff=lfs merge=lfs -text
|
| 213 |
+
onnx/up_blocks.0/onnx__MatMul_7275 filter=lfs diff=lfs merge=lfs -text
|
| 214 |
+
onnx/up_blocks.0/onnx__MatMul_7430 filter=lfs diff=lfs merge=lfs -text
|
| 215 |
+
onnx/up_blocks.0/onnx__MatMul_7267 filter=lfs diff=lfs merge=lfs -text
|
| 216 |
+
onnx/up_blocks.0/onnx__MatMul_7465 filter=lfs diff=lfs merge=lfs -text
|
| 217 |
+
onnx/up_blocks.0/onnx__MatMul_7405 filter=lfs diff=lfs merge=lfs -text
|
| 218 |
+
onnx/up_blocks.0/onnx__MatMul_7512 filter=lfs diff=lfs merge=lfs -text
|
| 219 |
+
onnx/up_blocks.0/onnx__MatMul_7339 filter=lfs diff=lfs merge=lfs -text
|
| 220 |
+
onnx/up_blocks.0/onnx__MatMul_7427 filter=lfs diff=lfs merge=lfs -text
|
| 221 |
+
onnx/up_blocks.0/onnx__MatMul_7194 filter=lfs diff=lfs merge=lfs -text
|
| 222 |
+
onnx/up_blocks.0/onnx__MatMul_7243 filter=lfs diff=lfs merge=lfs -text
|
| 223 |
+
onnx/up_blocks.0/onnx__MatMul_7621 filter=lfs diff=lfs merge=lfs -text
|
| 224 |
+
onnx/up_blocks.0/onnx__MatMul_7266 filter=lfs diff=lfs merge=lfs -text
|
| 225 |
+
onnx/up_blocks.0/onnx__MatMul_7456 filter=lfs diff=lfs merge=lfs -text
|
| 226 |
+
onnx/up_blocks.0/onnx__MatMul_7395 filter=lfs diff=lfs merge=lfs -text
|
| 227 |
+
onnx/up_blocks.0/onnx__MatMul_7077 filter=lfs diff=lfs merge=lfs -text
|
| 228 |
+
onnx/up_blocks.0/onnx__MatMul_7417 filter=lfs diff=lfs merge=lfs -text
|
| 229 |
+
onnx/up_blocks.0/onnx__MatMul_7585 filter=lfs diff=lfs merge=lfs -text
|
| 230 |
+
onnx/up_blocks.0/onnx__MatMul_7314 filter=lfs diff=lfs merge=lfs -text
|
| 231 |
+
onnx/up_blocks.0/onnx__MatMul_7623 filter=lfs diff=lfs merge=lfs -text
|
| 232 |
+
onnx/up_blocks.0/onnx__MatMul_7443 filter=lfs diff=lfs merge=lfs -text
|
| 233 |
+
onnx/up_blocks.0/onnx__MatMul_7419 filter=lfs diff=lfs merge=lfs -text
|
| 234 |
+
onnx/up_blocks.0/onnx__MatMul_7643 filter=lfs diff=lfs merge=lfs -text
|
| 235 |
+
onnx/up_blocks.0/onnx__MatMul_7489 filter=lfs diff=lfs merge=lfs -text
|
| 236 |
+
onnx/up_blocks.0/onnx__MatMul_7455 filter=lfs diff=lfs merge=lfs -text
|
| 237 |
+
onnx/up_blocks.0/onnx__MatMul_7479 filter=lfs diff=lfs merge=lfs -text
|
| 238 |
+
onnx/up_blocks.0/onnx__MatMul_7549 filter=lfs diff=lfs merge=lfs -text
|
| 239 |
+
onnx/up_blocks.0/onnx__MatMul_6957 filter=lfs diff=lfs merge=lfs -text
|
| 240 |
+
onnx/up_blocks.0/onnx__MatMul_6933 filter=lfs diff=lfs merge=lfs -text
|
| 241 |
+
onnx/up_blocks.0/onnx__MatMul_7504 filter=lfs diff=lfs merge=lfs -text
|
| 242 |
+
onnx/up_blocks.0/onnx__MatMul_7431 filter=lfs diff=lfs merge=lfs -text
|
| 243 |
+
onnx/up_blocks.0/onnx__MatMul_7441 filter=lfs diff=lfs merge=lfs -text
|
| 244 |
+
onnx/up_blocks.0/onnx__MatMul_7440 filter=lfs diff=lfs merge=lfs -text
|
| 245 |
+
onnx/up_blocks.0/onnx__MatMul_7408 filter=lfs diff=lfs merge=lfs -text
|
| 246 |
+
onnx/up_blocks.0/onnx__MatMul_7466 filter=lfs diff=lfs merge=lfs -text
|
| 247 |
+
onnx/up_blocks.0/onnx__MatMul_7416 filter=lfs diff=lfs merge=lfs -text
|
| 248 |
+
onnx/up_blocks.0/onnx__MatMul_7029 filter=lfs diff=lfs merge=lfs -text
|
| 249 |
+
onnx/up_blocks.0/onnx__MatMul_7406 filter=lfs diff=lfs merge=lfs -text
|
| 250 |
+
onnx/up_blocks.0/onnx__MatMul_7552 filter=lfs diff=lfs merge=lfs -text
|
| 251 |
+
onnx/up_blocks.0/onnx__MatMul_7451 filter=lfs diff=lfs merge=lfs -text
|
| 252 |
+
onnx/up_blocks.0/onnx__MatMul_7053 filter=lfs diff=lfs merge=lfs -text
|
| 253 |
+
onnx/up_blocks.0/onnx__MatMul_7005 filter=lfs diff=lfs merge=lfs -text
|
| 254 |
+
onnx/up_blocks.0/onnx__MatMul_7646 filter=lfs diff=lfs merge=lfs -text
|
| 255 |
+
onnx/up_blocks.0/onnx__MatMul_7560 filter=lfs diff=lfs merge=lfs -text
|
| 256 |
+
onnx/up_blocks.0/onnx__MatMul_7338 filter=lfs diff=lfs merge=lfs -text
|
| 257 |
+
onnx/up_blocks.0/onnx__MatMul_7595 filter=lfs diff=lfs merge=lfs -text
|
| 258 |
+
onnx/up_blocks.0/onnx__MatMul_7362 filter=lfs diff=lfs merge=lfs -text
|
| 259 |
+
onnx/up_blocks.0/onnx__MatMul_7574 filter=lfs diff=lfs merge=lfs -text
|
| 260 |
+
onnx/up_blocks.0/onnx__MatMul_7454 filter=lfs diff=lfs merge=lfs -text
|
| 261 |
+
onnx/up_blocks.0/onnx__MatMul_7645 filter=lfs diff=lfs merge=lfs -text
|
| 262 |
+
onnx/up_blocks.0/onnx__MatMul_7622 filter=lfs diff=lfs merge=lfs -text
|
| 263 |
+
onnx/up_blocks.0/onnx__MatMul_7478 filter=lfs diff=lfs merge=lfs -text
|
| 264 |
+
onnx/up_blocks.0/onnx__MatMul_7598 filter=lfs diff=lfs merge=lfs -text
|
| 265 |
+
onnx/up_blocks.0/onnx__MatMul_7608 filter=lfs diff=lfs merge=lfs -text
|
| 266 |
+
onnx/up_blocks.0/onnx__MatMul_7575 filter=lfs diff=lfs merge=lfs -text
|
| 267 |
+
onnx/up_blocks.0/onnx__MatMul_7561 filter=lfs diff=lfs merge=lfs -text
|
| 268 |
+
onnx/up_blocks.0/onnx__MatMul_7563 filter=lfs diff=lfs merge=lfs -text
|
| 269 |
+
onnx/up_blocks.0/onnx__MatMul_7375 filter=lfs diff=lfs merge=lfs -text
|
| 270 |
+
onnx/up_blocks.0/onnx__MatMul_7385 filter=lfs diff=lfs merge=lfs -text
|
| 271 |
+
onnx/up_blocks.0/onnx__MatMul_7584 filter=lfs diff=lfs merge=lfs -text
|
| 272 |
+
onnx/up_blocks.0/onnx__MatMul_7384 filter=lfs diff=lfs merge=lfs -text
|
| 273 |
+
onnx/up_blocks.0/onnx__MatMul_7467 filter=lfs diff=lfs merge=lfs -text
|
| 274 |
+
onnx/up_blocks.0/onnx__MatMul_7599 filter=lfs diff=lfs merge=lfs -text
|
| 275 |
+
onnx/up_blocks.0/onnx__MatMul_7551 filter=lfs diff=lfs merge=lfs -text
|
| 276 |
+
onnx/up_blocks.0/onnx__MatMul_7571 filter=lfs diff=lfs merge=lfs -text
|
| 277 |
+
onnx/up_blocks.0/onnx__MatMul_7125 filter=lfs diff=lfs merge=lfs -text
|
| 278 |
+
onnx/up_blocks.0/onnx__MatMul_7442 filter=lfs diff=lfs merge=lfs -text
|
| 279 |
+
onnx/up_blocks.0/onnx__MatMul_7528 filter=lfs diff=lfs merge=lfs -text
|
| 280 |
+
onnx/up_blocks.0/onnx__MatMul_7576 filter=lfs diff=lfs merge=lfs -text
|
| 281 |
+
onnx/up_blocks.0/onnx__MatMul_7398 filter=lfs diff=lfs merge=lfs -text
|
| 282 |
+
onnx/up_blocks.0/onnx__MatMul_7418 filter=lfs diff=lfs merge=lfs -text
|
| 283 |
+
onnx/up_blocks.0/onnx__MatMul_7562 filter=lfs diff=lfs merge=lfs -text
|
| 284 |
+
onnx/up_blocks.0/onnx__MatMul_7374 filter=lfs diff=lfs merge=lfs -text
|
| 285 |
+
onnx/up_blocks.0/onnx__MatMul_7610 filter=lfs diff=lfs merge=lfs -text
|
| 286 |
+
onnx/up_blocks.0/onnx__MatMul_7101 filter=lfs diff=lfs merge=lfs -text
|
| 287 |
+
onnx/up_blocks.0/onnx__MatMul_7523 filter=lfs diff=lfs merge=lfs -text
|
| 288 |
+
onnx/up_blocks.0/onnx__MatMul_7526 filter=lfs diff=lfs merge=lfs -text
|
| 289 |
+
onnx/up_blocks.0/onnx__MatMul_7536 filter=lfs diff=lfs merge=lfs -text
|
| 290 |
+
onnx/up_blocks.0/onnx__MatMul_7586 filter=lfs diff=lfs merge=lfs -text
|
| 291 |
+
onnx/up_blocks.0/onnx__MatMul_6981 filter=lfs diff=lfs merge=lfs -text
|
| 292 |
+
onnx/up_blocks.0/onnx__MatMul_7387 filter=lfs diff=lfs merge=lfs -text
|
| 293 |
+
onnx/up_blocks.0/onnx__MatMul_7277 filter=lfs diff=lfs merge=lfs -text
|
| 294 |
+
onnx/up_blocks.0/onnx__MatMul_7547 filter=lfs diff=lfs merge=lfs -text
|
| 295 |
+
onnx/up_blocks.0/onnx__MatMul_7539 filter=lfs diff=lfs merge=lfs -text
|
| 296 |
+
onnx/up_blocks.0/onnx__MatMul_7550 filter=lfs diff=lfs merge=lfs -text
|
| 297 |
+
onnx/up_blocks.0/onnx__MatMul_7515 filter=lfs diff=lfs merge=lfs -text
|
| 298 |
+
onnx/up_blocks.0/onnx__MatMul_7491 filter=lfs diff=lfs merge=lfs -text
|
| 299 |
+
onnx/up_blocks.0/onnx__MatMul_7514 filter=lfs diff=lfs merge=lfs -text
|
| 300 |
+
onnx/up_blocks.0/onnx__MatMul_7538 filter=lfs diff=lfs merge=lfs -text
|
| 301 |
+
onnx/up_blocks.0/onnx__MatMul_7149 filter=lfs diff=lfs merge=lfs -text
|
| 302 |
+
onnx/up_blocks.0/onnx__MatMul_7490 filter=lfs diff=lfs merge=lfs -text
|
| 303 |
+
onnx/up_blocks.0/onnx__MatMul_7229 filter=lfs diff=lfs merge=lfs -text
|
| 304 |
+
onnx/up_blocks.0/onnx__MatMul_7573 filter=lfs diff=lfs merge=lfs -text
|
| 305 |
+
onnx/up_blocks.0/onnx__MatMul_7597 filter=lfs diff=lfs merge=lfs -text
|
| 306 |
+
onnx/up_blocks.0/onnx__MatMul_7181 filter=lfs diff=lfs merge=lfs -text
|
| 307 |
+
onnx/up_blocks.0/onnx__MatMul_7373 filter=lfs diff=lfs merge=lfs -text
|
| 308 |
+
onnx/up_blocks.0/onnx__MatMul_7253 filter=lfs diff=lfs merge=lfs -text
|
| 309 |
+
onnx/up_blocks.0/onnx__MatMul_7349 filter=lfs diff=lfs merge=lfs -text
|
| 310 |
+
onnx/up_blocks.0/onnx__MatMul_7301 filter=lfs diff=lfs merge=lfs -text
|
| 311 |
+
onnx/up_blocks.0/onnx__MatMul_7325 filter=lfs diff=lfs merge=lfs -text
|
| 312 |
+
onnx/up_blocks.0/onnx__MatMul_7205 filter=lfs diff=lfs merge=lfs -text
|
| 313 |
+
onnx/up_blocks.0/onnx__MatMul_7501 filter=lfs diff=lfs merge=lfs -text
|
| 314 |
+
onnx/up_blocks.0/onnx__MatMul_7429 filter=lfs diff=lfs merge=lfs -text
|
| 315 |
+
onnx/up_blocks.0/onnx__MatMul_7397 filter=lfs diff=lfs merge=lfs -text
|
| 316 |
+
onnx/up_blocks.0/onnx__MatMul_7453 filter=lfs diff=lfs merge=lfs -text
|
| 317 |
+
onnx/up_blocks.0/onnx__MatMul_6932 filter=lfs diff=lfs merge=lfs -text
|
| 318 |
+
onnx/up_blocks.0/onnx__MatMul_7348 filter=lfs diff=lfs merge=lfs -text
|
| 319 |
+
engine/up_blocks.2.plan filter=lfs diff=lfs merge=lfs -text
|
| 320 |
+
onnx/up_blocks.0/onnx__MatMul_7525 filter=lfs diff=lfs merge=lfs -text
|
| 321 |
+
onnx/up_blocks.0/onnx__MatMul_7477 filter=lfs diff=lfs merge=lfs -text
|
| 322 |
+
onnx/up_blocks.0/onnx__MatMul_7500 filter=lfs diff=lfs merge=lfs -text
|
| 323 |
+
onnx/up_blocks.0/onnx__MatMul_7644 filter=lfs diff=lfs merge=lfs -text
|
| 324 |
+
onnx/up_blocks.0/onnx__MatMul_6980 filter=lfs diff=lfs merge=lfs -text
|
| 325 |
+
onnx/up_blocks.0/onnx__MatMul_7428 filter=lfs diff=lfs merge=lfs -text
|
| 326 |
+
onnx/up_blocks.0/onnx__MatMul_6956 filter=lfs diff=lfs merge=lfs -text
|
| 327 |
+
onnx/up_blocks.0/onnx__MatMul_7572 filter=lfs diff=lfs merge=lfs -text
|
| 328 |
+
onnx/up_blocks.0/onnx__MatMul_7620 filter=lfs diff=lfs merge=lfs -text
|
| 329 |
+
onnx/up_blocks.0/resnets.0.conv2.weight filter=lfs diff=lfs merge=lfs -text
|
| 330 |
+
onnx/up_blocks.0/onnx__MatMul_7004 filter=lfs diff=lfs merge=lfs -text
|
| 331 |
+
onnx/up_blocks.0/resnets.2.conv2.weight filter=lfs diff=lfs merge=lfs -text
|
| 332 |
+
onnx/up_blocks.0/onnx__MatMul_7052 filter=lfs diff=lfs merge=lfs -text
|
| 333 |
+
onnx/up_blocks.0/onnx__MatMul_7148 filter=lfs diff=lfs merge=lfs -text
|
| 334 |
+
onnx/up_blocks.0/onnx__MatMul_7252 filter=lfs diff=lfs merge=lfs -text
|
| 335 |
+
onnx/up_blocks.0/onnx__MatMul_7100 filter=lfs diff=lfs merge=lfs -text
|
| 336 |
+
onnx/up_blocks.0/onnx__MatMul_7300 filter=lfs diff=lfs merge=lfs -text
|
| 337 |
+
onnx/up_blocks.0/onnx__MatMul_7452 filter=lfs diff=lfs merge=lfs -text
|
| 338 |
+
onnx/up_blocks.0/upsamplers.0.conv.weight filter=lfs diff=lfs merge=lfs -text
|
| 339 |
+
onnx/up_blocks.0/onnx__MatMul_7524 filter=lfs diff=lfs merge=lfs -text
|
| 340 |
+
onnx/up_blocks.0/onnx__MatMul_7476 filter=lfs diff=lfs merge=lfs -text
|
| 341 |
+
onnx/up_blocks.0/onnx__MatMul_7028 filter=lfs diff=lfs merge=lfs -text
|
| 342 |
+
onnx/up_blocks.0/resnets.1.conv2.weight filter=lfs diff=lfs merge=lfs -text
|
| 343 |
+
onnx/up_blocks.0/onnx__MatMul_7124 filter=lfs diff=lfs merge=lfs -text
|
| 344 |
+
onnx/up_blocks.0/onnx__MatMul_7396 filter=lfs diff=lfs merge=lfs -text
|
| 345 |
+
onnx/up_blocks.0/onnx__MatMul_7076 filter=lfs diff=lfs merge=lfs -text
|
| 346 |
+
onnx/up_blocks.0/onnx__MatMul_7372 filter=lfs diff=lfs merge=lfs -text
|
| 347 |
+
onnx/up_blocks.0/onnx__MatMul_7180 filter=lfs diff=lfs merge=lfs -text
|
| 348 |
+
onnx/up_blocks.0/onnx__MatMul_7228 filter=lfs diff=lfs merge=lfs -text
|
| 349 |
+
onnx/up_blocks.0/onnx__MatMul_7204 filter=lfs diff=lfs merge=lfs -text
|
| 350 |
+
onnx/up_blocks.0/onnx__MatMul_7548 filter=lfs diff=lfs merge=lfs -text
|
| 351 |
+
onnx/up_blocks.0/resnets.2.conv1.weight filter=lfs diff=lfs merge=lfs -text
|
| 352 |
+
onnx/up_blocks.0/onnx__MatMul_7324 filter=lfs diff=lfs merge=lfs -text
|
| 353 |
+
onnx/up_blocks.0/onnx__MatMul_7276 filter=lfs diff=lfs merge=lfs -text
|
| 354 |
+
onnx/up_blocks.0/onnx__MatMul_7596 filter=lfs diff=lfs merge=lfs -text
|
| 355 |
+
onnx/up_blocks.0/resnets.1.conv1.weight filter=lfs diff=lfs merge=lfs -text
|
| 356 |
+
onnx/up_blocks.0/resnets.0.conv1.weight filter=lfs diff=lfs merge=lfs -text
|
| 357 |
+
engine/down_blocks.1.plan filter=lfs diff=lfs merge=lfs -text
|
| 358 |
+
engine/up_blocks.1.plan filter=lfs diff=lfs merge=lfs -text
|
| 359 |
+
engine/mid_block.plan filter=lfs diff=lfs merge=lfs -text
|
| 360 |
+
engine/down_blocks.2.plan filter=lfs diff=lfs merge=lfs -text
|
| 361 |
+
engine/up_blocks.0.plan filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
**/__pycache__
|
| 2 |
+
**.egg-info
|
.gitmodules
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[submodule "models/newdream-sdxl-20"]
|
| 2 |
+
path = models/newdream-sdxl-20
|
| 3 |
+
url = https://huggingface.co/stablediffusionapi/newdream-sdxl-20
|
engine/down_blocks.0.plan
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c4b70d1a0416aa494a3d7759349f9637c36c4e26729c3190f5a16bfc497694b
|
| 3 |
+
size 11712396
|
engine/down_blocks.1.plan
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21baa4ea0b4740f3cec8c35825dd6cc4dca7dc39cda3f6be8eab2ce1cd3e834f
|
| 3 |
+
size 124421828
|
engine/down_blocks.2.plan
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4412bf1d85e21eaac612aa9319b33f35b7e83d7858cca597bb6e645fa7bc7207
|
| 3 |
+
size 1522617884
|
engine/mid_block.plan
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:040b2e3dddac9efd68e125b02056a5cd4f8df7259e7167f14e57db8b68e26fb8
|
| 3 |
+
size 830401652
|
engine/up_blocks.0.plan
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0cbc51a0d7f00e4d8b108bfda46a25df62b41623e084733173730774e7e15048
|
| 3 |
+
size 2425023084
|
engine/up_blocks.1.plan
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1bd8a45dd815e72cdf75dd5df10a651c892b09f80a292edaafd4d192be419be3
|
| 3 |
+
size 218672972
|
engine/up_blocks.2.plan
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ee5d0d133198b30624c3d682b0438fa4485eeade489e7ab21ed425e1d947549
|
| 3 |
+
size 24347780
|
loss_params.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e4c687fb455b7495e325d5f1761391d281323de6d2a493b153a3dac9536664e
|
| 3 |
+
size 3120
|
pyproject.toml
CHANGED
|
@@ -8,30 +8,21 @@ description = "An edge-maxxing model submission for the 4090 newdream contest"
|
|
| 8 |
requires-python = ">=3.10,<3.11"
|
| 9 |
version = "6"
|
| 10 |
dependencies = [
|
| 11 |
-
"
|
| 12 |
-
"
|
| 13 |
-
"onediffx==1.2.0",
|
| 14 |
-
"accelerate==0.31.0",
|
| 15 |
-
"numpy==1.26.4",
|
| 16 |
-
"xformers==0.0.25.post1",
|
| 17 |
-
"triton==2.2.0",
|
| 18 |
"transformers==4.41.2",
|
| 19 |
"accelerate==0.31.0",
|
| 20 |
"omegaconf==2.3.0",
|
| 21 |
-
"torch==2.
|
| 22 |
-
"
|
| 23 |
-
"
|
| 24 |
-
"
|
| 25 |
-
"
|
|
|
|
|
|
|
|
|
|
| 26 |
"setuptools>=75.2.0",
|
| 27 |
-
"bitsandbytes>=0.44.1",
|
| 28 |
-
"stable-fast",
|
| 29 |
-
"tomesd>=0.1.3",
|
| 30 |
]
|
| 31 |
|
| 32 |
-
[tool.uv.sources]
|
| 33 |
-
oneflow = { url = "https://github.com/siliconflow/oneflow_releases/releases/download/community_cu118/oneflow-0.9.1.dev20240802%2Bcu118-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" }
|
| 34 |
-
stable-fast = { url = "https://github.com/chengzeyi/stable-fast/releases/download/v1.0.5/stable_fast-1.0.5+torch222cu121-cp310-cp310-manylinux2014_x86_64.whl" }
|
| 35 |
-
|
| 36 |
[project.scripts]
|
| 37 |
start_inference = "main:main"
|
|
|
|
| 8 |
requires-python = ">=3.10,<3.11"
|
| 9 |
version = "6"
|
| 10 |
dependencies = [
|
| 11 |
+
"wheel",
|
| 12 |
+
"diffusers==0.30.2",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
"transformers==4.41.2",
|
| 14 |
"accelerate==0.31.0",
|
| 15 |
"omegaconf==2.3.0",
|
| 16 |
+
"torch==2.4.1",
|
| 17 |
+
"edge-maxxing-pipelines @ git+https://github.com/womboai/edge-maxxing@8d8ff45863416484b5b4bc547782591bbdfc696a#subdirectory=pipelines",
|
| 18 |
+
"polygraphy",
|
| 19 |
+
"onnx",
|
| 20 |
+
"tensorrt>=10.5.0",
|
| 21 |
+
"tensorrt-cu12-libs>=10.5.0",
|
| 22 |
+
"tensorrt-cu12-bindings>=10.5.0",
|
| 23 |
+
"cuda-python>=12.6.0",
|
| 24 |
"setuptools>=75.2.0",
|
|
|
|
|
|
|
|
|
|
| 25 |
]
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
[project.scripts]
|
| 28 |
start_inference = "main:main"
|
src/cache_diffusion/cachify.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
| 2 |
+
# SPDX-License-Identifier: MIT
|
| 3 |
+
#
|
| 4 |
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
| 5 |
+
# copy of this software and associated documentation files (the "Software"),
|
| 6 |
+
# to deal in the Software without restriction, including without limitation
|
| 7 |
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
| 8 |
+
# and/or sell copies of the Software, and to permit persons to whom the
|
| 9 |
+
# Software is furnished to do so, subject to the following conditions:
|
| 10 |
+
#
|
| 11 |
+
# The above copyright notice and this permission notice shall be included in
|
| 12 |
+
# all copies or substantial portions of the Software.
|
| 13 |
+
#
|
| 14 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 15 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 16 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
| 17 |
+
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 18 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
| 19 |
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
| 20 |
+
# DEALINGS IN THE SOFTWARE.
|
| 21 |
+
|
| 22 |
+
import fnmatch
|
| 23 |
+
from contextlib import contextmanager
|
| 24 |
+
|
| 25 |
+
from diffusers.models.attention import BasicTransformerBlock, JointTransformerBlock
|
| 26 |
+
from diffusers.models.transformers.pixart_transformer_2d import PixArtTransformer2DModel
|
| 27 |
+
from diffusers.models.transformers.transformer_sd3 import SD3Transformer2DModel
|
| 28 |
+
from diffusers.models.unets.unet_2d_blocks import (
|
| 29 |
+
CrossAttnDownBlock2D,
|
| 30 |
+
CrossAttnUpBlock2D,
|
| 31 |
+
DownBlock2D,
|
| 32 |
+
UNetMidBlock2DCrossAttn,
|
| 33 |
+
UpBlock2D,
|
| 34 |
+
)
|
| 35 |
+
from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
|
| 36 |
+
from diffusers.models.unets.unet_3d_blocks import (
|
| 37 |
+
CrossAttnDownBlockSpatioTemporal,
|
| 38 |
+
CrossAttnUpBlockSpatioTemporal,
|
| 39 |
+
DownBlockSpatioTemporal,
|
| 40 |
+
UNetMidBlockSpatioTemporal,
|
| 41 |
+
UpBlockSpatioTemporal,
|
| 42 |
+
)
|
| 43 |
+
from diffusers.models.unets.unet_spatio_temporal_condition import UNetSpatioTemporalConditionModel
|
| 44 |
+
|
| 45 |
+
from .module import CachedModule
|
| 46 |
+
from .utils import replace_module
|
| 47 |
+
|
| 48 |
+
CACHED_PIPE = {
|
| 49 |
+
UNet2DConditionModel: (
|
| 50 |
+
DownBlock2D,
|
| 51 |
+
CrossAttnDownBlock2D,
|
| 52 |
+
UNetMidBlock2DCrossAttn,
|
| 53 |
+
CrossAttnUpBlock2D,
|
| 54 |
+
UpBlock2D,
|
| 55 |
+
),
|
| 56 |
+
PixArtTransformer2DModel: (BasicTransformerBlock),
|
| 57 |
+
UNetSpatioTemporalConditionModel: (
|
| 58 |
+
CrossAttnDownBlockSpatioTemporal,
|
| 59 |
+
DownBlockSpatioTemporal,
|
| 60 |
+
UpBlockSpatioTemporal,
|
| 61 |
+
CrossAttnUpBlockSpatioTemporal,
|
| 62 |
+
UNetMidBlockSpatioTemporal,
|
| 63 |
+
),
|
| 64 |
+
SD3Transformer2DModel: (JointTransformerBlock),
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def _apply_to_modules(model, action, modules=None, config_list=None):
|
| 69 |
+
if hasattr(model, "use_trt_infer") and model.use_trt_infer:
|
| 70 |
+
for key, module in model.engines.items():
|
| 71 |
+
if isinstance(module, CachedModule):
|
| 72 |
+
action(module)
|
| 73 |
+
elif config_list:
|
| 74 |
+
for config in config_list:
|
| 75 |
+
if _pass(key, config["wildcard_or_filter_func"]):
|
| 76 |
+
model.engines[key] = CachedModule(module, config["select_cache_step_func"])
|
| 77 |
+
else:
|
| 78 |
+
for name, module in model.named_modules():
|
| 79 |
+
if isinstance(module, CachedModule):
|
| 80 |
+
action(module)
|
| 81 |
+
elif modules and config_list:
|
| 82 |
+
for config in config_list:
|
| 83 |
+
if _pass(name, config["wildcard_or_filter_func"]) and isinstance(
|
| 84 |
+
module, modules
|
| 85 |
+
):
|
| 86 |
+
replace_module(
|
| 87 |
+
model,
|
| 88 |
+
name,
|
| 89 |
+
CachedModule(module, config["select_cache_step_func"]),
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def cachify(model, config_list, modules):
|
| 94 |
+
def cache_action(module):
|
| 95 |
+
pass # No action needed, caching is handled in the loop itself
|
| 96 |
+
|
| 97 |
+
_apply_to_modules(model, cache_action, modules, config_list)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def disable(pipe):
|
| 101 |
+
model = get_model(pipe)
|
| 102 |
+
_apply_to_modules(model, lambda module: module.disable_cache())
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def enable(pipe):
|
| 106 |
+
model = get_model(pipe)
|
| 107 |
+
_apply_to_modules(model, lambda module: module.enable_cache())
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def reset_status(pipe):
|
| 111 |
+
model = get_model(pipe)
|
| 112 |
+
_apply_to_modules(model, lambda module: setattr(module, "cur_step", 0))
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def _pass(name, wildcard_or_filter_func):
|
| 116 |
+
if isinstance(wildcard_or_filter_func, str):
|
| 117 |
+
return fnmatch.fnmatch(name, wildcard_or_filter_func)
|
| 118 |
+
elif callable(wildcard_or_filter_func):
|
| 119 |
+
return wildcard_or_filter_func(name)
|
| 120 |
+
else:
|
| 121 |
+
raise NotImplementedError(f"Unsupported type {type(wildcard_or_filter_func)}")
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def get_model(pipe):
|
| 125 |
+
if hasattr(pipe, "unet"):
|
| 126 |
+
return pipe.unet
|
| 127 |
+
elif hasattr(pipe, "transformer"):
|
| 128 |
+
return pipe.transformer
|
| 129 |
+
else:
|
| 130 |
+
raise KeyError
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
@contextmanager
|
| 134 |
+
def infer(pipe):
|
| 135 |
+
try:
|
| 136 |
+
yield pipe
|
| 137 |
+
finally:
|
| 138 |
+
reset_status(pipe)
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def prepare(pipe, config_list):
|
| 142 |
+
model = get_model(pipe)
|
| 143 |
+
assert model.__class__ in CACHED_PIPE.keys(), f"{model.__class__} is not supported!"
|
| 144 |
+
cachify(model, config_list, CACHED_PIPE[model.__class__])
|
src/cache_diffusion/module.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
| 2 |
+
# SPDX-License-Identifier: MIT
|
| 3 |
+
#
|
| 4 |
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
| 5 |
+
# copy of this software and associated documentation files (the "Software"),
|
| 6 |
+
# to deal in the Software without restriction, including without limitation
|
| 7 |
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
| 8 |
+
# and/or sell copies of the Software, and to permit persons to whom the
|
| 9 |
+
# Software is furnished to do so, subject to the following conditions:
|
| 10 |
+
#
|
| 11 |
+
# The above copyright notice and this permission notice shall be included in
|
| 12 |
+
# all copies or substantial portions of the Software.
|
| 13 |
+
#
|
| 14 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 15 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 16 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
| 17 |
+
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 18 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
| 19 |
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
| 20 |
+
# DEALINGS IN THE SOFTWARE.
|
| 21 |
+
|
| 22 |
+
from torch import nn
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class CachedModule(nn.Module):
|
| 26 |
+
def __init__(self, block, select_cache_step_func) -> None:
|
| 27 |
+
super().__init__()
|
| 28 |
+
self.block = block
|
| 29 |
+
self.select_cache_step_func = select_cache_step_func
|
| 30 |
+
self.cur_step = 0
|
| 31 |
+
self.cached_results = None
|
| 32 |
+
self.enabled = True
|
| 33 |
+
|
| 34 |
+
def __getattr__(self, name):
|
| 35 |
+
try:
|
| 36 |
+
return super().__getattr__(name)
|
| 37 |
+
except AttributeError:
|
| 38 |
+
return getattr(self.block, name)
|
| 39 |
+
|
| 40 |
+
def if_cache(self):
|
| 41 |
+
return self.select_cache_step_func(self.cur_step) and self.enabled
|
| 42 |
+
|
| 43 |
+
def enable_cache(self):
|
| 44 |
+
self.enabled = True
|
| 45 |
+
|
| 46 |
+
def disable_cache(self):
|
| 47 |
+
self.enabled = False
|
| 48 |
+
self.cur_step = 0
|
| 49 |
+
|
| 50 |
+
def forward(self, *args, **kwargs):
|
| 51 |
+
if not self.if_cache():
|
| 52 |
+
self.cached_results = self.block(*args, **kwargs)
|
| 53 |
+
if self.enabled:
|
| 54 |
+
self.cur_step += 1
|
| 55 |
+
return self.cached_results
|
src/cache_diffusion/utils.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
| 2 |
+
# SPDX-License-Identifier: MIT
|
| 3 |
+
#
|
| 4 |
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
| 5 |
+
# copy of this software and associated documentation files (the "Software"),
|
| 6 |
+
# to deal in the Software without restriction, including without limitation
|
| 7 |
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
| 8 |
+
# and/or sell copies of the Software, and to permit persons to whom the
|
| 9 |
+
# Software is furnished to do so, subject to the following conditions:
|
| 10 |
+
#
|
| 11 |
+
# The above copyright notice and this permission notice shall be included in
|
| 12 |
+
# all copies or substantial portions of the Software.
|
| 13 |
+
#
|
| 14 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 15 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 16 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
| 17 |
+
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 18 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
| 19 |
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
| 20 |
+
# DEALINGS IN THE SOFTWARE.
|
| 21 |
+
|
| 22 |
+
import re
|
| 23 |
+
|
| 24 |
+
SDXL_DEFAULT_CONFIG = [
|
| 25 |
+
{
|
| 26 |
+
"wildcard_or_filter_func": lambda name: "up_blocks.2" not in name,
|
| 27 |
+
"select_cache_step_func": lambda step: (step % 2) != 0,
|
| 28 |
+
}
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
PIXART_DEFAULT_CONFIG = [
|
| 32 |
+
{
|
| 33 |
+
"wildcard_or_filter_func": lambda name: not re.search(
|
| 34 |
+
r"transformer_blocks\.(2[1-7])\.", name
|
| 35 |
+
),
|
| 36 |
+
"select_cache_step_func": lambda step: (step % 3) != 0,
|
| 37 |
+
}
|
| 38 |
+
]
|
| 39 |
+
|
| 40 |
+
SVD_DEFAULT_CONFIG = [
|
| 41 |
+
{
|
| 42 |
+
"wildcard_or_filter_func": lambda name: "up_blocks.3" not in name,
|
| 43 |
+
"select_cache_step_func": lambda step: (step % 2) != 0,
|
| 44 |
+
}
|
| 45 |
+
]
|
| 46 |
+
|
| 47 |
+
SD3_DEFAULT_CONFIG = [
|
| 48 |
+
{
|
| 49 |
+
"wildcard_or_filter_func": lambda name: re.search(
|
| 50 |
+
r"^((?!transformer_blocks\.(1[6-9]|2[0-3])).)*$", name
|
| 51 |
+
),
|
| 52 |
+
"select_cache_step_func": lambda step: (step % 2) != 0,
|
| 53 |
+
}
|
| 54 |
+
]
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def replace_module(parent, name_path, new_module):
|
| 58 |
+
path_parts = name_path.split(".")
|
| 59 |
+
for part in path_parts[:-1]:
|
| 60 |
+
parent = getattr(parent, part)
|
| 61 |
+
setattr(parent, path_parts[-1], new_module)
|
src/pipeline.py
CHANGED
|
@@ -1,982 +1,54 @@
|
|
| 1 |
import torch
|
| 2 |
-
from
|
|
|
|
|
|
|
| 3 |
from pipelines.models import TextToImageRequest
|
| 4 |
from torch import Generator
|
| 5 |
-
import
|
| 6 |
-
from
|
| 7 |
-
import inspect
|
| 8 |
-
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
| 9 |
from loss import SchedulerWrapper
|
| 10 |
-
from onediffx import compile_pipe,load_pipe
|
| 11 |
-
# Import necessary components
|
| 12 |
-
from transformers import (
|
| 13 |
-
CLIPImageProcessor,
|
| 14 |
-
CLIPTextModel,
|
| 15 |
-
CLIPTextModelWithProjection,
|
| 16 |
-
CLIPTokenizer,
|
| 17 |
-
CLIPVisionModelWithProjection,
|
| 18 |
-
)
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
from diffusers.callbacks import MultiPipelineCallbacks, PipelineCallback
|
| 22 |
-
from diffusers.image_processor import PipelineImageInput, VaeImageProcessor
|
| 23 |
-
from diffusers.loaders import (
|
| 24 |
-
FromSingleFileMixin,
|
| 25 |
-
IPAdapterMixin,
|
| 26 |
-
StableDiffusionXLLoraLoaderMixin,
|
| 27 |
-
TextualInversionLoaderMixin,
|
| 28 |
-
)
|
| 29 |
-
from diffusers.models import AutoencoderKL, ImageProjection, UNet2DConditionModel
|
| 30 |
-
from diffusers.models.attention_processor import (
|
| 31 |
-
AttnProcessor2_0,
|
| 32 |
-
FusedAttnProcessor2_0,
|
| 33 |
-
XFormersAttnProcessor,
|
| 34 |
-
)
|
| 35 |
-
from diffusers.models.lora import adjust_lora_scale_text_encoder
|
| 36 |
-
from diffusers.schedulers import KarrasDiffusionSchedulers
|
| 37 |
-
from diffusers.utils import (
|
| 38 |
-
USE_PEFT_BACKEND,
|
| 39 |
-
deprecate,
|
| 40 |
-
is_invisible_watermark_available,
|
| 41 |
-
is_torch_xla_available,
|
| 42 |
-
logging,
|
| 43 |
-
replace_example_docstring,
|
| 44 |
-
scale_lora_layers,
|
| 45 |
-
unscale_lora_layers,
|
| 46 |
-
)
|
| 47 |
-
from diffusers.utils.torch_utils import randn_tensor
|
| 48 |
-
from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
| 49 |
-
from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
|
| 50 |
-
|
| 51 |
-
# Import watermark if available
|
| 52 |
-
if is_invisible_watermark_available():
|
| 53 |
-
from .watermark import StableDiffusionXLWatermarker
|
| 54 |
-
|
| 55 |
-
# Check for XLA availability
|
| 56 |
-
if is_torch_xla_available():
|
| 57 |
-
import torch_xla.core.xla_model as xm
|
| 58 |
-
XLA_AVAILABLE = True
|
| 59 |
-
else:
|
| 60 |
-
XLA_AVAILABLE = False
|
| 61 |
-
|
| 62 |
-
logger = logging.get_logger(__name__)
|
| 63 |
-
|
| 64 |
-
# Constants
|
| 65 |
-
EXAMPLE_DOC_STRING = """
|
| 66 |
-
Examples:
|
| 67 |
-
```py
|
| 68 |
-
>>> import torch
|
| 69 |
-
>>> from diffusers import StableDiffusionXLPipeline
|
| 70 |
-
|
| 71 |
-
>>> pipe = StableDiffusionXLPipeline.from_pretrained(
|
| 72 |
-
>>> "stabilityai/stable-diffusion-xl-base-1.0",
|
| 73 |
-
>>> torch_dtype=torch.float16
|
| 74 |
-
>>> )
|
| 75 |
-
>>> pipe = pipe.to("cuda")
|
| 76 |
-
|
| 77 |
-
>>> prompt = "a photo of an astronaut riding a horse on mars"
|
| 78 |
-
>>> image = pipe(prompt).images[0]
|
| 79 |
-
```
|
| 80 |
-
"""
|
| 81 |
-
|
| 82 |
-
# Helper functions
|
| 83 |
-
def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
| 84 |
-
"""Rescale noise configuration."""
|
| 85 |
-
std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
|
| 86 |
-
std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
|
| 87 |
-
noise_pred_rescaled = noise_cfg * (std_text / std_cfg)
|
| 88 |
-
noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg
|
| 89 |
-
return noise_cfg
|
| 90 |
-
|
| 91 |
-
# Utils functions
|
| 92 |
-
import numpy as np
|
| 93 |
-
def max_pixel_filter(image: Image) -> Image:
|
| 94 |
-
try:
|
| 95 |
-
# Convert the image to a numpy array
|
| 96 |
-
img_array = np.array(image)
|
| 97 |
-
# Find the maximum pixel value in the image
|
| 98 |
-
max_val = img_array.max()
|
| 99 |
-
# Reduce the maximum value to 1
|
| 100 |
-
img_array[img_array == max_val] -= 2
|
| 101 |
-
# Convert the numpy array back to an image
|
| 102 |
-
filtered_image = Image.fromarray(img_array)
|
| 103 |
-
return filtered_image
|
| 104 |
-
except:
|
| 105 |
-
return image
|
| 106 |
-
|
| 107 |
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.retrieve_timesteps
|
| 108 |
-
def retrieve_timesteps(
|
| 109 |
-
scheduler,
|
| 110 |
-
num_inference_steps: Optional[int] = None,
|
| 111 |
-
device: Optional[Union[str, torch.device]] = None,
|
| 112 |
-
timesteps: Optional[List[int]] = None,
|
| 113 |
-
sigmas: Optional[List[float]] = None,
|
| 114 |
-
**kwargs,
|
| 115 |
-
):
|
| 116 |
-
if timesteps is not None and sigmas is not None:
|
| 117 |
-
raise ValueError("Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values")
|
| 118 |
-
if timesteps is not None:
|
| 119 |
-
accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
|
| 120 |
-
if not accepts_timesteps:
|
| 121 |
-
raise ValueError(
|
| 122 |
-
f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
|
| 123 |
-
f" timestep schedules. Please check whether you are using the correct scheduler."
|
| 124 |
-
)
|
| 125 |
-
scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
|
| 126 |
-
timesteps = scheduler.timesteps
|
| 127 |
-
num_inference_steps = len(timesteps)
|
| 128 |
-
elif sigmas is not None:
|
| 129 |
-
accept_sigmas = "sigmas" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
|
| 130 |
-
if not accept_sigmas:
|
| 131 |
-
raise ValueError(
|
| 132 |
-
f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
|
| 133 |
-
f" sigmas schedules. Please check whether you are using the correct scheduler."
|
| 134 |
-
)
|
| 135 |
-
scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs)
|
| 136 |
-
timesteps = scheduler.timesteps
|
| 137 |
-
num_inference_steps = len(timesteps)
|
| 138 |
-
else:
|
| 139 |
-
scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
|
| 140 |
-
timesteps = scheduler.timesteps
|
| 141 |
-
return timesteps, num_inference_steps
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
class StableDiffusionXLPipeline_new(
|
| 145 |
-
DiffusionPipeline,
|
| 146 |
-
StableDiffusionMixin,
|
| 147 |
-
FromSingleFileMixin,
|
| 148 |
-
StableDiffusionXLLoraLoaderMixin,
|
| 149 |
-
TextualInversionLoaderMixin,
|
| 150 |
-
IPAdapterMixin,
|
| 151 |
-
):
|
| 152 |
-
|
| 153 |
-
model_cpu_offload_seq = "text_encoder->text_encoder_2->image_encoder->unet->vae"
|
| 154 |
-
_optional_components = [
|
| 155 |
-
"tokenizer",
|
| 156 |
-
"tokenizer_2",
|
| 157 |
-
"text_encoder",
|
| 158 |
-
"text_encoder_2",
|
| 159 |
-
"image_encoder",
|
| 160 |
-
"feature_extractor",
|
| 161 |
-
]
|
| 162 |
-
_callback_tensor_inputs = [
|
| 163 |
-
"latents",
|
| 164 |
-
"prompt_embeds",
|
| 165 |
-
"negative_prompt_embeds",
|
| 166 |
-
"add_text_embeds",
|
| 167 |
-
"add_time_ids",
|
| 168 |
-
"negative_pooled_prompt_embeds",
|
| 169 |
-
"negative_add_time_ids",
|
| 170 |
-
]
|
| 171 |
-
|
| 172 |
-
def __init__(
|
| 173 |
-
self,
|
| 174 |
-
vae: AutoencoderKL,
|
| 175 |
-
text_encoder: CLIPTextModel,
|
| 176 |
-
text_encoder_2: CLIPTextModelWithProjection,
|
| 177 |
-
tokenizer: CLIPTokenizer,
|
| 178 |
-
tokenizer_2: CLIPTokenizer,
|
| 179 |
-
unet: UNet2DConditionModel,
|
| 180 |
-
scheduler: KarrasDiffusionSchedulers,
|
| 181 |
-
image_encoder: CLIPVisionModelWithProjection = None,
|
| 182 |
-
feature_extractor: CLIPImageProcessor = None,
|
| 183 |
-
force_zeros_for_empty_prompt: bool = True,
|
| 184 |
-
add_watermarker: Optional[bool] = None,
|
| 185 |
-
):
|
| 186 |
-
super().__init__()
|
| 187 |
-
|
| 188 |
-
self.register_modules(
|
| 189 |
-
vae=vae,
|
| 190 |
-
text_encoder=text_encoder,
|
| 191 |
-
text_encoder_2=text_encoder_2,
|
| 192 |
-
tokenizer=tokenizer,
|
| 193 |
-
tokenizer_2=tokenizer_2,
|
| 194 |
-
unet=unet,
|
| 195 |
-
scheduler=scheduler,
|
| 196 |
-
image_encoder=image_encoder,
|
| 197 |
-
feature_extractor=feature_extractor,
|
| 198 |
-
)
|
| 199 |
-
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
|
| 200 |
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
| 201 |
-
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
| 202 |
-
|
| 203 |
-
self.default_sample_size = self.unet.config.sample_size
|
| 204 |
-
|
| 205 |
-
add_watermarker = add_watermarker if add_watermarker is not None else is_invisible_watermark_available()
|
| 206 |
-
|
| 207 |
-
if add_watermarker:
|
| 208 |
-
self.watermark = StableDiffusionXLWatermarker()
|
| 209 |
-
else:
|
| 210 |
-
self.watermark = None
|
| 211 |
-
|
| 212 |
-
def encode_prompt(
|
| 213 |
-
self,
|
| 214 |
-
prompt: str,
|
| 215 |
-
prompt_2: Optional[str] = None,
|
| 216 |
-
device: Optional[torch.device] = None,
|
| 217 |
-
num_images_per_prompt: int = 1,
|
| 218 |
-
do_classifier_free_guidance: bool = True,
|
| 219 |
-
negative_prompt: Optional[str] = None,
|
| 220 |
-
negative_prompt_2: Optional[str] = None,
|
| 221 |
-
prompt_embeds: Optional[torch.Tensor] = None,
|
| 222 |
-
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
| 223 |
-
pooled_prompt_embeds: Optional[torch.Tensor] = None,
|
| 224 |
-
negative_pooled_prompt_embeds: Optional[torch.Tensor] = None,
|
| 225 |
-
lora_scale: Optional[float] = None,
|
| 226 |
-
clip_skip: Optional[int] = None,
|
| 227 |
-
):
|
| 228 |
-
device = device or self._execution_device
|
| 229 |
-
|
| 230 |
-
# set lora scale so that monkey patched LoRA
|
| 231 |
-
# function of text encoder can correctly access it
|
| 232 |
-
if lora_scale is not None and isinstance(self, StableDiffusionXLLoraLoaderMixin):
|
| 233 |
-
self._lora_scale = lora_scale
|
| 234 |
-
|
| 235 |
-
# dynamically adjust the LoRA scale
|
| 236 |
-
if self.text_encoder is not None:
|
| 237 |
-
if not USE_PEFT_BACKEND:
|
| 238 |
-
adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
|
| 239 |
-
else:
|
| 240 |
-
scale_lora_layers(self.text_encoder, lora_scale)
|
| 241 |
-
|
| 242 |
-
if self.text_encoder_2 is not None:
|
| 243 |
-
if not USE_PEFT_BACKEND:
|
| 244 |
-
adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
|
| 245 |
-
else:
|
| 246 |
-
scale_lora_layers(self.text_encoder_2, lora_scale)
|
| 247 |
-
|
| 248 |
-
prompt = [prompt] if isinstance(prompt, str) else prompt
|
| 249 |
-
|
| 250 |
-
if prompt is not None:
|
| 251 |
-
batch_size = len(prompt)
|
| 252 |
-
else:
|
| 253 |
-
batch_size = prompt_embeds.shape[0]
|
| 254 |
-
|
| 255 |
-
# Define tokenizers and text encoders
|
| 256 |
-
tokenizers = [self.tokenizer, self.tokenizer_2] if self.tokenizer is not None else [self.tokenizer_2]
|
| 257 |
-
text_encoders = (
|
| 258 |
-
[self.text_encoder, self.text_encoder_2] if self.text_encoder is not None else [self.text_encoder_2]
|
| 259 |
-
)
|
| 260 |
-
|
| 261 |
-
if prompt_embeds is None:
|
| 262 |
-
prompt_2 = prompt_2 or prompt
|
| 263 |
-
prompt_2 = [prompt_2] if isinstance(prompt_2, str) else prompt_2
|
| 264 |
-
|
| 265 |
-
# textual inversion: process multi-vector tokens if necessary
|
| 266 |
-
prompt_embeds_list = []
|
| 267 |
-
prompts = [prompt, prompt_2]
|
| 268 |
-
for prompt, tokenizer, text_encoder in zip(prompts, tokenizers, text_encoders):
|
| 269 |
-
if isinstance(self, TextualInversionLoaderMixin):
|
| 270 |
-
prompt = self.maybe_convert_prompt(prompt, tokenizer)
|
| 271 |
-
|
| 272 |
-
text_inputs = tokenizer(
|
| 273 |
-
prompt,
|
| 274 |
-
padding="max_length",
|
| 275 |
-
max_length=tokenizer.model_max_length,
|
| 276 |
-
truncation=True,
|
| 277 |
-
return_tensors="pt",
|
| 278 |
-
)
|
| 279 |
-
|
| 280 |
-
text_input_ids = text_inputs.input_ids
|
| 281 |
-
untruncated_ids = tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
|
| 282 |
-
|
| 283 |
-
if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(
|
| 284 |
-
text_input_ids, untruncated_ids
|
| 285 |
-
):
|
| 286 |
-
removed_text = tokenizer.batch_decode(untruncated_ids[:, tokenizer.model_max_length - 1 : -1])
|
| 287 |
-
logger.warning(
|
| 288 |
-
"The following part of your input was truncated because CLIP can only handle sequences up to"
|
| 289 |
-
f" {tokenizer.model_max_length} tokens: {removed_text}"
|
| 290 |
-
)
|
| 291 |
-
|
| 292 |
-
prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
|
| 293 |
-
|
| 294 |
-
# We are only ALWAYS interested in the pooled output of the final text encoder
|
| 295 |
-
pooled_prompt_embeds = prompt_embeds[0]
|
| 296 |
-
if clip_skip is None:
|
| 297 |
-
prompt_embeds = prompt_embeds.hidden_states[-2]
|
| 298 |
-
else:
|
| 299 |
-
# "2" because SDXL always indexes from the penultimate layer.
|
| 300 |
-
prompt_embeds = prompt_embeds.hidden_states[-(clip_skip + 2)]
|
| 301 |
-
|
| 302 |
-
prompt_embeds_list.append(prompt_embeds)
|
| 303 |
-
|
| 304 |
-
prompt_embeds = torch.concat(prompt_embeds_list, dim=-1)
|
| 305 |
-
|
| 306 |
-
# get unconditional embeddings for classifier free guidance
|
| 307 |
-
zero_out_negative_prompt = negative_prompt is None and self.config.force_zeros_for_empty_prompt
|
| 308 |
-
if do_classifier_free_guidance and negative_prompt_embeds is None and zero_out_negative_prompt:
|
| 309 |
-
negative_prompt_embeds = torch.zeros_like(prompt_embeds)
|
| 310 |
-
negative_pooled_prompt_embeds = torch.zeros_like(pooled_prompt_embeds)
|
| 311 |
-
elif do_classifier_free_guidance and negative_prompt_embeds is None:
|
| 312 |
-
negative_prompt = negative_prompt or ""
|
| 313 |
-
negative_prompt_2 = negative_prompt_2 or negative_prompt
|
| 314 |
-
|
| 315 |
-
# normalize str to list
|
| 316 |
-
negative_prompt = batch_size * [negative_prompt] if isinstance(negative_prompt, str) else negative_prompt
|
| 317 |
-
negative_prompt_2 = (
|
| 318 |
-
batch_size * [negative_prompt_2] if isinstance(negative_prompt_2, str) else negative_prompt_2
|
| 319 |
-
)
|
| 320 |
-
|
| 321 |
-
uncond_tokens: List[str]
|
| 322 |
-
if prompt is not None and type(prompt) is not type(negative_prompt):
|
| 323 |
-
raise TypeError(
|
| 324 |
-
f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !="
|
| 325 |
-
f" {type(prompt)}."
|
| 326 |
-
)
|
| 327 |
-
elif batch_size != len(negative_prompt):
|
| 328 |
-
raise ValueError(
|
| 329 |
-
f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:"
|
| 330 |
-
f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches"
|
| 331 |
-
" the batch size of `prompt`."
|
| 332 |
-
)
|
| 333 |
-
else:
|
| 334 |
-
uncond_tokens = [negative_prompt, negative_prompt_2]
|
| 335 |
-
|
| 336 |
-
negative_prompt_embeds_list = []
|
| 337 |
-
for negative_prompt, tokenizer, text_encoder in zip(uncond_tokens, tokenizers, text_encoders):
|
| 338 |
-
if isinstance(self, TextualInversionLoaderMixin):
|
| 339 |
-
negative_prompt = self.maybe_convert_prompt(negative_prompt, tokenizer)
|
| 340 |
-
|
| 341 |
-
max_length = prompt_embeds.shape[1]
|
| 342 |
-
uncond_input = tokenizer(
|
| 343 |
-
negative_prompt,
|
| 344 |
-
padding="max_length",
|
| 345 |
-
max_length=max_length,
|
| 346 |
-
truncation=True,
|
| 347 |
-
return_tensors="pt",
|
| 348 |
-
)
|
| 349 |
-
|
| 350 |
-
negative_prompt_embeds = text_encoder(
|
| 351 |
-
uncond_input.input_ids.to(device),
|
| 352 |
-
output_hidden_states=True,
|
| 353 |
-
)
|
| 354 |
-
# We are only ALWAYS interested in the pooled output of the final text encoder
|
| 355 |
-
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
|
| 356 |
-
negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
|
| 357 |
-
|
| 358 |
-
negative_prompt_embeds_list.append(negative_prompt_embeds)
|
| 359 |
-
|
| 360 |
-
negative_prompt_embeds = torch.concat(negative_prompt_embeds_list, dim=-1)
|
| 361 |
-
|
| 362 |
-
if self.text_encoder_2 is not None:
|
| 363 |
-
prompt_embeds = prompt_embeds.to(dtype=self.text_encoder_2.dtype, device=device)
|
| 364 |
-
else:
|
| 365 |
-
prompt_embeds = prompt_embeds.to(dtype=self.unet.dtype, device=device)
|
| 366 |
-
|
| 367 |
-
bs_embed, seq_len, _ = prompt_embeds.shape
|
| 368 |
-
# duplicate text embeddings for each generation per prompt, using mps friendly method
|
| 369 |
-
prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1)
|
| 370 |
-
prompt_embeds = prompt_embeds.view(bs_embed * num_images_per_prompt, seq_len, -1)
|
| 371 |
-
|
| 372 |
-
if do_classifier_free_guidance:
|
| 373 |
-
# duplicate unconditional embeddings for each generation per prompt, using mps friendly method
|
| 374 |
-
seq_len = negative_prompt_embeds.shape[1]
|
| 375 |
-
|
| 376 |
-
if self.text_encoder_2 is not None:
|
| 377 |
-
negative_prompt_embeds = negative_prompt_embeds.to(dtype=self.text_encoder_2.dtype, device=device)
|
| 378 |
-
else:
|
| 379 |
-
negative_prompt_embeds = negative_prompt_embeds.to(dtype=self.unet.dtype, device=device)
|
| 380 |
-
|
| 381 |
-
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
|
| 382 |
-
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
|
| 383 |
-
|
| 384 |
-
pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
|
| 385 |
-
bs_embed * num_images_per_prompt, -1
|
| 386 |
-
)
|
| 387 |
-
if do_classifier_free_guidance:
|
| 388 |
-
negative_pooled_prompt_embeds = negative_pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
|
| 389 |
-
bs_embed * num_images_per_prompt, -1
|
| 390 |
-
)
|
| 391 |
-
|
| 392 |
-
if self.text_encoder is not None:
|
| 393 |
-
if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND:
|
| 394 |
-
# Retrieve the original scale by scaling back the LoRA layers
|
| 395 |
-
unscale_lora_layers(self.text_encoder, lora_scale)
|
| 396 |
-
|
| 397 |
-
if self.text_encoder_2 is not None:
|
| 398 |
-
if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND:
|
| 399 |
-
# Retrieve the original scale by scaling back the LoRA layers
|
| 400 |
-
unscale_lora_layers(self.text_encoder_2, lora_scale)
|
| 401 |
-
|
| 402 |
-
return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
|
| 403 |
-
|
| 404 |
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_image
|
| 405 |
-
def encode_image(self, image, device, num_images_per_prompt, output_hidden_states=None):
|
| 406 |
-
dtype = next(self.image_encoder.parameters()).dtype
|
| 407 |
-
|
| 408 |
-
if not isinstance(image, torch.Tensor):
|
| 409 |
-
image = self.feature_extractor(image, return_tensors="pt").pixel_values
|
| 410 |
-
|
| 411 |
-
image = image.to(device=device, dtype=dtype)
|
| 412 |
-
if output_hidden_states:
|
| 413 |
-
image_enc_hidden_states = self.image_encoder(image, output_hidden_states=True).hidden_states[-2]
|
| 414 |
-
image_enc_hidden_states = image_enc_hidden_states.repeat_interleave(num_images_per_prompt, dim=0)
|
| 415 |
-
uncond_image_enc_hidden_states = self.image_encoder(
|
| 416 |
-
torch.zeros_like(image), output_hidden_states=True
|
| 417 |
-
).hidden_states[-2]
|
| 418 |
-
uncond_image_enc_hidden_states = uncond_image_enc_hidden_states.repeat_interleave(
|
| 419 |
-
num_images_per_prompt, dim=0
|
| 420 |
-
)
|
| 421 |
-
return image_enc_hidden_states, uncond_image_enc_hidden_states
|
| 422 |
-
else:
|
| 423 |
-
image_embeds = self.image_encoder(image).image_embeds
|
| 424 |
-
image_embeds = image_embeds.repeat_interleave(num_images_per_prompt, dim=0)
|
| 425 |
-
uncond_image_embeds = torch.zeros_like(image_embeds)
|
| 426 |
-
|
| 427 |
-
return image_embeds, uncond_image_embeds
|
| 428 |
-
|
| 429 |
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_ip_adapter_image_embeds
|
| 430 |
-
def prepare_ip_adapter_image_embeds(
|
| 431 |
-
self, ip_adapter_image, ip_adapter_image_embeds, device, num_images_per_prompt, do_classifier_free_guidance
|
| 432 |
-
):
|
| 433 |
-
image_embeds = []
|
| 434 |
-
if do_classifier_free_guidance:
|
| 435 |
-
negative_image_embeds = []
|
| 436 |
-
if ip_adapter_image_embeds is None:
|
| 437 |
-
if not isinstance(ip_adapter_image, list):
|
| 438 |
-
ip_adapter_image = [ip_adapter_image]
|
| 439 |
-
|
| 440 |
-
if len(ip_adapter_image) != len(self.unet.encoder_hid_proj.image_projection_layers):
|
| 441 |
-
raise ValueError(
|
| 442 |
-
f"`ip_adapter_image` must have same length as the number of IP Adapters. Got {len(ip_adapter_image)} images and {len(self.unet.encoder_hid_proj.image_projection_layers)} IP Adapters."
|
| 443 |
-
)
|
| 444 |
-
|
| 445 |
-
for single_ip_adapter_image, image_proj_layer in zip(
|
| 446 |
-
ip_adapter_image, self.unet.encoder_hid_proj.image_projection_layers
|
| 447 |
-
):
|
| 448 |
-
output_hidden_state = not isinstance(image_proj_layer, ImageProjection)
|
| 449 |
-
single_image_embeds, single_negative_image_embeds = self.encode_image(
|
| 450 |
-
single_ip_adapter_image, device, 1, output_hidden_state
|
| 451 |
-
)
|
| 452 |
-
|
| 453 |
-
image_embeds.append(single_image_embeds[None, :])
|
| 454 |
-
if do_classifier_free_guidance:
|
| 455 |
-
negative_image_embeds.append(single_negative_image_embeds[None, :])
|
| 456 |
-
else:
|
| 457 |
-
for single_image_embeds in ip_adapter_image_embeds:
|
| 458 |
-
if do_classifier_free_guidance:
|
| 459 |
-
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
| 460 |
-
negative_image_embeds.append(single_negative_image_embeds)
|
| 461 |
-
image_embeds.append(single_image_embeds)
|
| 462 |
-
|
| 463 |
-
ip_adapter_image_embeds = []
|
| 464 |
-
for i, single_image_embeds in enumerate(image_embeds):
|
| 465 |
-
single_image_embeds = torch.cat([single_image_embeds] * num_images_per_prompt, dim=0)
|
| 466 |
-
if do_classifier_free_guidance:
|
| 467 |
-
single_negative_image_embeds = torch.cat([negative_image_embeds[i]] * num_images_per_prompt, dim=0)
|
| 468 |
-
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds], dim=0)
|
| 469 |
-
|
| 470 |
-
single_image_embeds = single_image_embeds.to(device=device)
|
| 471 |
-
ip_adapter_image_embeds.append(single_image_embeds)
|
| 472 |
-
|
| 473 |
-
return ip_adapter_image_embeds
|
| 474 |
-
|
| 475 |
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
|
| 476 |
-
def prepare_extra_step_kwargs(self, generator, eta):
|
| 477 |
-
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
| 478 |
-
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
| 479 |
-
# eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
|
| 480 |
-
# and should be between [0, 1]
|
| 481 |
-
|
| 482 |
-
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
| 483 |
-
extra_step_kwargs = {}
|
| 484 |
-
if accepts_eta:
|
| 485 |
-
extra_step_kwargs["eta"] = eta
|
| 486 |
-
|
| 487 |
-
# check if the scheduler accepts generator
|
| 488 |
-
accepts_generator = "generator" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
| 489 |
-
if accepts_generator:
|
| 490 |
-
extra_step_kwargs["generator"] = generator
|
| 491 |
-
return extra_step_kwargs
|
| 492 |
-
|
| 493 |
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
| 494 |
-
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
| 495 |
-
shape = (
|
| 496 |
-
batch_size,
|
| 497 |
-
num_channels_latents,
|
| 498 |
-
int(height) // self.vae_scale_factor,
|
| 499 |
-
int(width) // self.vae_scale_factor,
|
| 500 |
-
)
|
| 501 |
-
if isinstance(generator, list) and len(generator) != batch_size:
|
| 502 |
-
raise ValueError(
|
| 503 |
-
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
| 504 |
-
f" size of {batch_size}. Make sure the batch size matches the length of the generators."
|
| 505 |
-
)
|
| 506 |
-
|
| 507 |
-
if latents is None:
|
| 508 |
-
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
| 509 |
-
else:
|
| 510 |
-
latents = latents.to(device)
|
| 511 |
-
|
| 512 |
-
# scale the initial noise by the standard deviation required by the scheduler
|
| 513 |
-
latents = latents * self.scheduler.init_noise_sigma
|
| 514 |
-
return latents
|
| 515 |
-
|
| 516 |
-
def _get_add_time_ids(
|
| 517 |
-
self, original_size, crops_coords_top_left, target_size, dtype, text_encoder_projection_dim=None
|
| 518 |
-
):
|
| 519 |
-
add_time_ids = list(original_size + crops_coords_top_left + target_size)
|
| 520 |
-
|
| 521 |
-
passed_add_embed_dim = (
|
| 522 |
-
self.unet.config.addition_time_embed_dim * len(add_time_ids) + text_encoder_projection_dim
|
| 523 |
-
)
|
| 524 |
-
expected_add_embed_dim = self.unet.add_embedding.linear_1.in_features
|
| 525 |
-
|
| 526 |
-
if expected_add_embed_dim != passed_add_embed_dim:
|
| 527 |
-
raise ValueError(
|
| 528 |
-
f"Model expects an added time embedding vector of length {expected_add_embed_dim}, but a vector of {passed_add_embed_dim} was created. The model has an incorrect config. Please check `unet.config.time_embedding_type` and `text_encoder_2.config.projection_dim`."
|
| 529 |
-
)
|
| 530 |
-
|
| 531 |
-
add_time_ids = torch.tensor([add_time_ids], dtype=dtype)
|
| 532 |
-
return add_time_ids
|
| 533 |
-
|
| 534 |
-
def upcast_vae(self):
|
| 535 |
-
dtype = self.vae.dtype
|
| 536 |
-
self.vae.to(dtype=torch.float32)
|
| 537 |
-
use_torch_2_0_or_xformers = isinstance(
|
| 538 |
-
self.vae.decoder.mid_block.attentions[0].processor,
|
| 539 |
-
(
|
| 540 |
-
AttnProcessor2_0,
|
| 541 |
-
XFormersAttnProcessor,
|
| 542 |
-
FusedAttnProcessor2_0,
|
| 543 |
-
),
|
| 544 |
-
)
|
| 545 |
-
# if xformers or torch_2_0 is used attention block does not need
|
| 546 |
-
# to be in float32 which can save lots of memory
|
| 547 |
-
if use_torch_2_0_or_xformers:
|
| 548 |
-
self.vae.post_quant_conv.to(dtype)
|
| 549 |
-
self.vae.decoder.conv_in.to(dtype)
|
| 550 |
-
self.vae.decoder.mid_block.to(dtype)
|
| 551 |
-
|
| 552 |
-
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
|
| 553 |
-
def get_guidance_scale_embedding(
|
| 554 |
-
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
|
| 555 |
-
) -> torch.Tensor:
|
| 556 |
-
"""
|
| 557 |
-
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
| 558 |
-
|
| 559 |
-
Args:
|
| 560 |
-
w (`torch.Tensor`):
|
| 561 |
-
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
|
| 562 |
-
embedding_dim (`int`, *optional*, defaults to 512):
|
| 563 |
-
Dimension of the embeddings to generate.
|
| 564 |
-
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
|
| 565 |
-
Data type of the generated embeddings.
|
| 566 |
-
|
| 567 |
-
Returns:
|
| 568 |
-
`torch.Tensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
|
| 569 |
-
"""
|
| 570 |
-
assert len(w.shape) == 1
|
| 571 |
-
w = w * 1000.0
|
| 572 |
-
|
| 573 |
-
half_dim = embedding_dim // 2
|
| 574 |
-
emb = torch.log(torch.tensor(10000.0)) / (half_dim - 1)
|
| 575 |
-
emb = torch.exp(torch.arange(half_dim, dtype=dtype) * -emb)
|
| 576 |
-
emb = w.to(dtype)[:, None] * emb[None, :]
|
| 577 |
-
emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1)
|
| 578 |
-
if embedding_dim % 2 == 1: # zero pad
|
| 579 |
-
emb = torch.nn.functional.pad(emb, (0, 1))
|
| 580 |
-
assert emb.shape == (w.shape[0], embedding_dim)
|
| 581 |
-
return emb
|
| 582 |
-
|
| 583 |
-
@property
|
| 584 |
-
def guidance_scale(self):
|
| 585 |
-
return self._guidance_scale
|
| 586 |
-
|
| 587 |
-
@property
|
| 588 |
-
def guidance_rescale(self):
|
| 589 |
-
return self._guidance_rescale
|
| 590 |
-
|
| 591 |
-
@property
|
| 592 |
-
def clip_skip(self):
|
| 593 |
-
return self._clip_skip
|
| 594 |
-
|
| 595 |
-
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
| 596 |
-
# of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
|
| 597 |
-
# corresponds to doing no classifier free guidance.
|
| 598 |
-
@property
|
| 599 |
-
def do_classifier_free_guidance(self):
|
| 600 |
-
return self._guidance_scale > 1 and self.unet.config.time_cond_proj_dim is None
|
| 601 |
-
|
| 602 |
-
@property
|
| 603 |
-
def cross_attention_kwargs(self):
|
| 604 |
-
return self._cross_attention_kwargs
|
| 605 |
-
|
| 606 |
-
@property
|
| 607 |
-
def denoising_end(self):
|
| 608 |
-
return self._denoising_end
|
| 609 |
-
|
| 610 |
-
@property
|
| 611 |
-
def num_timesteps(self):
|
| 612 |
-
return self._num_timesteps
|
| 613 |
-
|
| 614 |
-
@property
|
| 615 |
-
def interrupt(self):
|
| 616 |
-
return self._interrupt
|
| 617 |
-
|
| 618 |
-
@torch.no_grad()
|
| 619 |
-
def __call__(
|
| 620 |
-
self,
|
| 621 |
-
prompt: Union[str, List[str]] = None,
|
| 622 |
-
prompt_2: Optional[Union[str, List[str]]] = None,
|
| 623 |
-
height: Optional[int] = None,
|
| 624 |
-
width: Optional[int] = None,
|
| 625 |
-
num_inference_steps: int = 50,
|
| 626 |
-
timesteps: List[int] = None,
|
| 627 |
-
sigmas: List[float] = None,
|
| 628 |
-
denoising_end: Optional[float] = None,
|
| 629 |
-
guidance_scale: float = 5.0,
|
| 630 |
-
negative_prompt: Optional[Union[str, List[str]]] = None,
|
| 631 |
-
negative_prompt_2: Optional[Union[str, List[str]]] = None,
|
| 632 |
-
num_images_per_prompt: Optional[int] = 1,
|
| 633 |
-
eta: float = 0.0,
|
| 634 |
-
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
| 635 |
-
latents: Optional[torch.Tensor] = None,
|
| 636 |
-
prompt_embeds: Optional[torch.Tensor] = None,
|
| 637 |
-
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
| 638 |
-
pooled_prompt_embeds: Optional[torch.Tensor] = None,
|
| 639 |
-
negative_pooled_prompt_embeds: Optional[torch.Tensor] = None,
|
| 640 |
-
ip_adapter_image: Optional[PipelineImageInput] = None,
|
| 641 |
-
ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
|
| 642 |
-
output_type: Optional[str] = "pil",
|
| 643 |
-
return_dict: bool = True,
|
| 644 |
-
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
| 645 |
-
guidance_rescale: float = 0.0,
|
| 646 |
-
end_cfg: float = 0.9,
|
| 647 |
-
original_size: Optional[Tuple[int, int]] = None,
|
| 648 |
-
crops_coords_top_left: Tuple[int, int] = (0, 0),
|
| 649 |
-
target_size: Optional[Tuple[int, int]] = None,
|
| 650 |
-
negative_original_size: Optional[Tuple[int, int]] = None,
|
| 651 |
-
negative_crops_coords_top_left: Tuple[int, int] = (0, 0),
|
| 652 |
-
negative_target_size: Optional[Tuple[int, int]] = None,
|
| 653 |
-
clip_skip: Optional[int] = None,
|
| 654 |
-
callback_on_step_end: Optional[
|
| 655 |
-
Union[Callable[[int, int, Dict], None], PipelineCallback, MultiPipelineCallbacks]
|
| 656 |
-
] = None,
|
| 657 |
-
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
|
| 658 |
-
**kwargs,
|
| 659 |
-
):
|
| 660 |
-
callback = kwargs.pop("callback", None)
|
| 661 |
-
callback_steps = kwargs.pop("callback_steps", None)
|
| 662 |
-
|
| 663 |
-
if callback is not None:
|
| 664 |
-
deprecate(
|
| 665 |
-
"callback",
|
| 666 |
-
"1.0.0",
|
| 667 |
-
"Passing `callback` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
|
| 668 |
-
)
|
| 669 |
-
if callback_steps is not None:
|
| 670 |
-
deprecate(
|
| 671 |
-
"callback_steps",
|
| 672 |
-
"1.0.0",
|
| 673 |
-
"Passing `callback_steps` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
|
| 674 |
-
)
|
| 675 |
-
|
| 676 |
-
if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
|
| 677 |
-
callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
|
| 678 |
-
|
| 679 |
-
# 0. Default height and width to unet
|
| 680 |
-
height = height or self.default_sample_size * self.vae_scale_factor
|
| 681 |
-
width = width or self.default_sample_size * self.vae_scale_factor
|
| 682 |
-
|
| 683 |
-
original_size = original_size or (height, width)
|
| 684 |
-
target_size = target_size or (height, width)
|
| 685 |
-
|
| 686 |
-
self._guidance_scale = guidance_scale
|
| 687 |
-
self._guidance_rescale = guidance_rescale
|
| 688 |
-
self._clip_skip = clip_skip
|
| 689 |
-
self._cross_attention_kwargs = cross_attention_kwargs
|
| 690 |
-
self._denoising_end = denoising_end
|
| 691 |
-
self._interrupt = False
|
| 692 |
-
|
| 693 |
-
# 2. Define call parameters
|
| 694 |
-
if prompt is not None and isinstance(prompt, str):
|
| 695 |
-
batch_size = 1
|
| 696 |
-
elif prompt is not None and isinstance(prompt, list):
|
| 697 |
-
batch_size = len(prompt)
|
| 698 |
-
else:
|
| 699 |
-
batch_size = prompt_embeds.shape[0]
|
| 700 |
-
|
| 701 |
-
device = self._execution_device
|
| 702 |
-
|
| 703 |
-
# 3. Encode input prompt
|
| 704 |
-
lora_scale = (
|
| 705 |
-
self.cross_attention_kwargs.get("scale", None) if self.cross_attention_kwargs is not None else None
|
| 706 |
-
)
|
| 707 |
-
|
| 708 |
-
(
|
| 709 |
-
prompt_embeds,
|
| 710 |
-
negative_prompt_embeds,
|
| 711 |
-
pooled_prompt_embeds,
|
| 712 |
-
negative_pooled_prompt_embeds,
|
| 713 |
-
) = self.encode_prompt(
|
| 714 |
-
prompt=prompt,
|
| 715 |
-
prompt_2=prompt_2,
|
| 716 |
-
device=device,
|
| 717 |
-
num_images_per_prompt=num_images_per_prompt,
|
| 718 |
-
do_classifier_free_guidance=self.do_classifier_free_guidance,
|
| 719 |
-
negative_prompt=negative_prompt,
|
| 720 |
-
negative_prompt_2=negative_prompt_2,
|
| 721 |
-
prompt_embeds=prompt_embeds,
|
| 722 |
-
negative_prompt_embeds=negative_prompt_embeds,
|
| 723 |
-
pooled_prompt_embeds=pooled_prompt_embeds,
|
| 724 |
-
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
|
| 725 |
-
lora_scale=lora_scale,
|
| 726 |
-
clip_skip=self.clip_skip,
|
| 727 |
-
)
|
| 728 |
-
|
| 729 |
-
# 4. Prepare timesteps
|
| 730 |
-
timesteps, num_inference_steps = retrieve_timesteps(
|
| 731 |
-
self.scheduler, num_inference_steps, device, timesteps, sigmas
|
| 732 |
-
)
|
| 733 |
-
|
| 734 |
-
# 5. Prepare latent variables
|
| 735 |
-
num_channels_latents = self.unet.config.in_channels
|
| 736 |
-
latents = self.prepare_latents(
|
| 737 |
-
batch_size * num_images_per_prompt,
|
| 738 |
-
num_channels_latents,
|
| 739 |
-
height,
|
| 740 |
-
width,
|
| 741 |
-
prompt_embeds.dtype,
|
| 742 |
-
device,
|
| 743 |
-
generator,
|
| 744 |
-
latents,
|
| 745 |
-
)
|
| 746 |
-
|
| 747 |
-
# 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
|
| 748 |
-
extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
|
| 749 |
-
|
| 750 |
-
# 7. Prepare added time ids & embeddings
|
| 751 |
-
add_text_embeds = pooled_prompt_embeds
|
| 752 |
-
if self.text_encoder_2 is None:
|
| 753 |
-
text_encoder_projection_dim = int(pooled_prompt_embeds.shape[-1])
|
| 754 |
-
else:
|
| 755 |
-
text_encoder_projection_dim = self.text_encoder_2.config.projection_dim
|
| 756 |
-
|
| 757 |
-
add_time_ids = self._get_add_time_ids(
|
| 758 |
-
original_size,
|
| 759 |
-
crops_coords_top_left,
|
| 760 |
-
target_size,
|
| 761 |
-
dtype=prompt_embeds.dtype,
|
| 762 |
-
text_encoder_projection_dim=text_encoder_projection_dim,
|
| 763 |
-
)
|
| 764 |
-
if negative_original_size is not None and negative_target_size is not None:
|
| 765 |
-
negative_add_time_ids = self._get_add_time_ids(
|
| 766 |
-
negative_original_size,
|
| 767 |
-
negative_crops_coords_top_left,
|
| 768 |
-
negative_target_size,
|
| 769 |
-
dtype=prompt_embeds.dtype,
|
| 770 |
-
text_encoder_projection_dim=text_encoder_projection_dim,
|
| 771 |
-
)
|
| 772 |
-
else:
|
| 773 |
-
negative_add_time_ids = add_time_ids
|
| 774 |
-
|
| 775 |
-
if self.do_classifier_free_guidance:
|
| 776 |
-
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
|
| 777 |
-
add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
|
| 778 |
-
add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
|
| 779 |
-
|
| 780 |
-
prompt_embeds = prompt_embeds.to(device)
|
| 781 |
-
add_text_embeds = add_text_embeds.to(device)
|
| 782 |
-
add_time_ids = add_time_ids.to(device).repeat(batch_size * num_images_per_prompt, 1)
|
| 783 |
-
|
| 784 |
-
if ip_adapter_image is not None or ip_adapter_image_embeds is not None:
|
| 785 |
-
image_embeds = self.prepare_ip_adapter_image_embeds(
|
| 786 |
-
ip_adapter_image,
|
| 787 |
-
ip_adapter_image_embeds,
|
| 788 |
-
device,
|
| 789 |
-
batch_size * num_images_per_prompt,
|
| 790 |
-
self.do_classifier_free_guidance,
|
| 791 |
-
)
|
| 792 |
-
|
| 793 |
-
# 8. Denoising loop
|
| 794 |
-
num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
|
| 795 |
-
|
| 796 |
-
# 8.1 Apply denoising_end
|
| 797 |
-
if (
|
| 798 |
-
self.denoising_end is not None
|
| 799 |
-
and isinstance(self.denoising_end, float)
|
| 800 |
-
and self.denoising_end > 0
|
| 801 |
-
and self.denoising_end < 1
|
| 802 |
-
):
|
| 803 |
-
discrete_timestep_cutoff = int(
|
| 804 |
-
round(
|
| 805 |
-
self.scheduler.config.num_train_timesteps
|
| 806 |
-
- (self.denoising_end * self.scheduler.config.num_train_timesteps)
|
| 807 |
-
)
|
| 808 |
-
)
|
| 809 |
-
num_inference_steps = len(list(filter(lambda ts: ts >= discrete_timestep_cutoff, timesteps)))
|
| 810 |
-
timesteps = timesteps[:num_inference_steps]
|
| 811 |
-
|
| 812 |
-
# 9. Optionally get Guidance Scale Embedding
|
| 813 |
-
timestep_cond = None
|
| 814 |
-
if self.unet.config.time_cond_proj_dim is not None:
|
| 815 |
-
guidance_scale_tensor = torch.tensor(self.guidance_scale - 1).repeat(batch_size * num_images_per_prompt)
|
| 816 |
-
timestep_cond = self.get_guidance_scale_embedding(
|
| 817 |
-
guidance_scale_tensor, embedding_dim=self.unet.config.time_cond_proj_dim
|
| 818 |
-
).to(device=device, dtype=latents.dtype)
|
| 819 |
-
|
| 820 |
-
self._num_timesteps = len(timesteps)
|
| 821 |
-
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
| 822 |
-
do_classifier_free_guidance = self.do_classifier_free_guidance
|
| 823 |
-
for i, t in enumerate(timesteps):
|
| 824 |
-
if self.interrupt:
|
| 825 |
-
continue
|
| 826 |
-
if end_cfg is not None and i / num_inference_steps > end_cfg and do_classifier_free_guidance:
|
| 827 |
-
do_classifier_free_guidance = False
|
| 828 |
-
prompt_embeds = 1.5*torch.chunk(prompt_embeds, 2, dim=0)[-1]
|
| 829 |
-
add_text_embeds = 1.5*torch.chunk(add_text_embeds, 2, dim=0)[-1]
|
| 830 |
-
add_time_ids = 1.25*torch.chunk(add_time_ids, 2, dim=0)[-1]
|
| 831 |
-
# expand the latents if we are doing classifier free guidance
|
| 832 |
-
latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
|
| 833 |
-
|
| 834 |
-
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
| 835 |
-
|
| 836 |
-
# predict the noise residual
|
| 837 |
-
added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
|
| 838 |
-
if ip_adapter_image is not None or ip_adapter_image_embeds is not None:
|
| 839 |
-
added_cond_kwargs["image_embeds"] = image_embeds
|
| 840 |
-
noise_pred = self.unet(
|
| 841 |
-
latent_model_input,
|
| 842 |
-
t,
|
| 843 |
-
encoder_hidden_states=prompt_embeds,
|
| 844 |
-
timestep_cond=timestep_cond,
|
| 845 |
-
cross_attention_kwargs=self.cross_attention_kwargs,
|
| 846 |
-
added_cond_kwargs=added_cond_kwargs,
|
| 847 |
-
return_dict=False,
|
| 848 |
-
)[0]
|
| 849 |
-
|
| 850 |
-
# perform guidance
|
| 851 |
-
if do_classifier_free_guidance:
|
| 852 |
-
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
|
| 853 |
-
noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
|
| 854 |
-
|
| 855 |
-
if do_classifier_free_guidance and self.guidance_rescale > 0.0:
|
| 856 |
-
# Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
|
| 857 |
-
noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
|
| 858 |
-
|
| 859 |
-
# compute the previous noisy sample x_t -> x_t-1
|
| 860 |
-
latents_dtype = latents.dtype
|
| 861 |
-
latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
|
| 862 |
-
if latents.dtype != latents_dtype:
|
| 863 |
-
if torch.backends.mps.is_available():
|
| 864 |
-
# some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272
|
| 865 |
-
latents = latents.to(latents_dtype)
|
| 866 |
-
|
| 867 |
-
if callback_on_step_end is not None:
|
| 868 |
-
callback_kwargs = {}
|
| 869 |
-
for k in callback_on_step_end_tensor_inputs:
|
| 870 |
-
callback_kwargs[k] = locals()[k]
|
| 871 |
-
callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
|
| 872 |
-
|
| 873 |
-
latents = callback_outputs.pop("latents", latents)
|
| 874 |
-
prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
|
| 875 |
-
negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
|
| 876 |
-
add_text_embeds = callback_outputs.pop("add_text_embeds", add_text_embeds)
|
| 877 |
-
negative_pooled_prompt_embeds = callback_outputs.pop(
|
| 878 |
-
"negative_pooled_prompt_embeds", negative_pooled_prompt_embeds
|
| 879 |
-
)
|
| 880 |
-
add_time_ids = callback_outputs.pop("add_time_ids", add_time_ids)
|
| 881 |
-
negative_add_time_ids = callback_outputs.pop("negative_add_time_ids", negative_add_time_ids)
|
| 882 |
-
|
| 883 |
-
# call the callback, if provided
|
| 884 |
-
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
|
| 885 |
-
progress_bar.update()
|
| 886 |
-
if callback is not None and i % callback_steps == 0:
|
| 887 |
-
step_idx = i // getattr(self.scheduler, "order", 1)
|
| 888 |
-
callback(step_idx, t, latents)
|
| 889 |
-
|
| 890 |
-
if XLA_AVAILABLE:
|
| 891 |
-
xm.mark_step()
|
| 892 |
-
|
| 893 |
-
if not output_type == "latent":
|
| 894 |
-
# make sure the VAE is in float32 mode, as it overflows in float16
|
| 895 |
-
needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
|
| 896 |
-
|
| 897 |
-
if needs_upcasting:
|
| 898 |
-
self.upcast_vae()
|
| 899 |
-
latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
|
| 900 |
-
elif latents.dtype != self.vae.dtype:
|
| 901 |
-
if torch.backends.mps.is_available():
|
| 902 |
-
# some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272
|
| 903 |
-
self.vae = self.vae.to(latents.dtype)
|
| 904 |
-
|
| 905 |
-
# unscale/denormalize the latents
|
| 906 |
-
# denormalize with the mean and std if available and not None
|
| 907 |
-
has_latents_mean = hasattr(self.vae.config, "latents_mean") and self.vae.config.latents_mean is not None
|
| 908 |
-
has_latents_std = hasattr(self.vae.config, "latents_std") and self.vae.config.latents_std is not None
|
| 909 |
-
if has_latents_mean and has_latents_std:
|
| 910 |
-
latents_mean = (
|
| 911 |
-
torch.tensor(self.vae.config.latents_mean).view(1, 4, 1, 1).to(latents.device, latents.dtype)
|
| 912 |
-
)
|
| 913 |
-
latents_std = (
|
| 914 |
-
torch.tensor(self.vae.config.latents_std).view(1, 4, 1, 1).to(latents.device, latents.dtype)
|
| 915 |
-
)
|
| 916 |
-
latents = latents * latents_std / self.vae.config.scaling_factor + latents_mean
|
| 917 |
-
else:
|
| 918 |
-
latents = latents / self.vae.config.scaling_factor
|
| 919 |
-
|
| 920 |
-
image = self.vae.decode(latents, return_dict=False)[0]
|
| 921 |
-
|
| 922 |
-
# cast back to fp16 if needed
|
| 923 |
-
if needs_upcasting:
|
| 924 |
-
self.vae.to(dtype=torch.float16)
|
| 925 |
-
else:
|
| 926 |
-
image = latents
|
| 927 |
-
|
| 928 |
-
if not output_type == "latent":
|
| 929 |
-
# apply watermark if available
|
| 930 |
-
if self.watermark is not None:
|
| 931 |
-
image = self.watermark.apply_watermark(image)
|
| 932 |
-
|
| 933 |
-
image = self.image_processor.postprocess(image, output_type=output_type)
|
| 934 |
-
|
| 935 |
-
# Offload all models
|
| 936 |
-
self.maybe_free_model_hooks()
|
| 937 |
-
|
| 938 |
-
if not return_dict:
|
| 939 |
-
return (image,)
|
| 940 |
-
|
| 941 |
-
return StableDiffusionXLPipelineOutput(images=image)
|
| 942 |
-
|
| 943 |
-
def load_pipeline(pipeline=None) -> StableDiffusionXLPipeline:
|
| 944 |
-
"""Load and prepare the pipeline."""
|
| 945 |
-
if not pipeline:
|
| 946 |
-
pipeline = StableDiffusionXLPipeline_new.from_pretrained(
|
| 947 |
-
"./models/newdream-sdxl-20",
|
| 948 |
-
torch_dtype=torch.float16,
|
| 949 |
-
local_files_only=True,
|
| 950 |
-
).to("cuda")
|
| 951 |
-
|
| 952 |
-
pipeline.scheduler = SchedulerWrapper(DDIMScheduler.from_config(pipeline.scheduler.config))
|
| 953 |
-
pipeline = compile_pipe(pipeline)
|
| 954 |
-
load_pipe(pipeline, dir="cached_pipe")
|
| 955 |
-
|
| 956 |
-
# Warm-up runs
|
| 957 |
-
for _ in range(4):
|
| 958 |
-
pipeline(
|
| 959 |
-
prompt="a cat and a dog",
|
| 960 |
-
num_inference_steps=18
|
| 961 |
-
)
|
| 962 |
-
pipeline.scheduler.prepare_loss()
|
| 963 |
-
return pipeline
|
| 964 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 965 |
|
| 966 |
def infer(request: TextToImageRequest, pipeline: StableDiffusionXLPipeline) -> Image:
|
| 967 |
-
"""Generate image from text prompt."""
|
| 968 |
-
generator = Generator(pipeline.device).manual_seed(request.seed) if request.seed else None
|
| 969 |
-
|
| 970 |
-
image_0 = pipeline(
|
| 971 |
-
prompt=request.prompt,
|
| 972 |
-
negative_prompt=request.negative_prompt,
|
| 973 |
-
width=request.width,
|
| 974 |
-
height=request.height,
|
| 975 |
-
generator=generator,
|
| 976 |
-
num_inference_steps=18,
|
| 977 |
-
).images[0]
|
| 978 |
-
|
| 979 |
-
filter_image = max_pixel_filter(image_0)
|
| 980 |
-
return filter_image
|
| 981 |
-
|
| 982 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import torch
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
from PIL.Image import Image
|
| 4 |
+
from diffusers import StableDiffusionXLPipeline, DDIMScheduler
|
| 5 |
from pipelines.models import TextToImageRequest
|
| 6 |
from torch import Generator
|
| 7 |
+
from cache_diffusion import cachify
|
| 8 |
+
from trt_pipeline.deploy import load_unet_trt
|
|
|
|
|
|
|
| 9 |
from loss import SchedulerWrapper
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
+
generator = Generator(torch.device("cuda")).manual_seed(69)
|
| 12 |
+
prompt = "make submissions great again"
|
| 13 |
+
SDXL_DEFAULT_CONFIG = [
|
| 14 |
+
{
|
| 15 |
+
"wildcard_or_filter_func": lambda name: "down_blocks.2" not in name and"down_blocks.3" not in name and "up_blocks.2" not in name,
|
| 16 |
+
"select_cache_step_func": lambda step: (step % 2 != 0) and (step >= 10),
|
| 17 |
+
}]
|
| 18 |
+
def load_pipeline() -> StableDiffusionXLPipeline:
|
| 19 |
+
pipe = StableDiffusionXLPipeline.from_pretrained(
|
| 20 |
+
"models/newdream-sdxl-20", torch_dtype=torch.float16, use_safetensors=True, local_files_only=True
|
| 21 |
+
).to("cuda")
|
| 22 |
+
load_unet_trt(
|
| 23 |
+
pipe.unet,
|
| 24 |
+
engine_path=Path("./engine"),
|
| 25 |
+
batch_size=1,
|
| 26 |
+
)
|
| 27 |
+
cachify.prepare(pipe, SDXL_DEFAULT_CONFIG)
|
| 28 |
+
cachify.enable(pipe)
|
| 29 |
+
pipe.scheduler = SchedulerWrapper(DDIMScheduler.from_config(pipe.scheduler.config))
|
| 30 |
+
with cachify.infer(pipe) as cached_pipe:
|
| 31 |
+
for _ in range(5):
|
| 32 |
+
pipe(prompt=prompt, num_inference_steps=20)
|
| 33 |
+
cachify.disable(pipe)
|
| 34 |
+
pipe.scheduler.prepare_loss()
|
| 35 |
+
return pipe
|
| 36 |
|
| 37 |
def infer(request: TextToImageRequest, pipeline: StableDiffusionXLPipeline) -> Image:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
+
if request.seed is None:
|
| 40 |
+
generator = None
|
| 41 |
+
else:
|
| 42 |
+
generator = Generator(pipeline.device).manual_seed(request.seed)
|
| 43 |
+
cachify.prepare(pipeline, SDXL_DEFAULT_CONFIG)
|
| 44 |
+
cachify.enable(pipeline)
|
| 45 |
+
with cachify.infer(pipeline) as cached_pipe:
|
| 46 |
+
image = cached_pipe(
|
| 47 |
+
prompt=request.prompt,
|
| 48 |
+
negative_prompt=request.negative_prompt,
|
| 49 |
+
width=request.width,
|
| 50 |
+
height=request.height,
|
| 51 |
+
generator=generator,
|
| 52 |
+
num_inference_steps=15,
|
| 53 |
+
).images[0]
|
| 54 |
+
return image
|
src/trt_pipeline/config.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
| 2 |
+
# SPDX-License-Identifier: MIT
|
| 3 |
+
#
|
| 4 |
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
| 5 |
+
# copy of this software and associated documentation files (the "Software"),
|
| 6 |
+
# to deal in the Software without restriction, including without limitation
|
| 7 |
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
| 8 |
+
# and/or sell copies of the Software, and to permit persons to whom the
|
| 9 |
+
# Software is furnished to do so, subject to the following conditions:
|
| 10 |
+
#
|
| 11 |
+
# The above copyright notice and this permission notice shall be included in
|
| 12 |
+
# all copies or substantial portions of the Software.
|
| 13 |
+
#
|
| 14 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 15 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 16 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
| 17 |
+
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 18 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
| 19 |
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
| 20 |
+
# DEALINGS IN THE SOFTWARE.
|
| 21 |
+
from diffusers.models.transformers.transformer_sd3 import SD3Transformer2DModel
|
| 22 |
+
from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
|
| 23 |
+
|
| 24 |
+
sd3_common_transformer_block_config = {
|
| 25 |
+
"dummy_input": {
|
| 26 |
+
"hidden_states": (2, 4096, 1536),
|
| 27 |
+
"encoder_hidden_states": (2, 333, 1536),
|
| 28 |
+
"temb": (2, 1536),
|
| 29 |
+
},
|
| 30 |
+
"output_names": ["encoder_hidden_states_out", "hidden_states_out"],
|
| 31 |
+
"dynamic_axes": {
|
| 32 |
+
"hidden_states": {0: "batch_size"},
|
| 33 |
+
"encoder_hidden_states": {0: "batch_size"},
|
| 34 |
+
"temb": {0: "steps"},
|
| 35 |
+
},
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
ONNX_CONFIG = {
|
| 39 |
+
UNet2DConditionModel: {
|
| 40 |
+
"down_blocks.0": {
|
| 41 |
+
"dummy_input": {
|
| 42 |
+
"hidden_states": (2, 320, 128, 128),
|
| 43 |
+
"temb": (2, 1280),
|
| 44 |
+
},
|
| 45 |
+
"output_names": ["sample", "res_samples_0", "res_samples_1", "res_samples_2"],
|
| 46 |
+
"dynamic_axes": {
|
| 47 |
+
"hidden_states": {0: "batch_size"},
|
| 48 |
+
"temb": {0: "steps"},
|
| 49 |
+
},
|
| 50 |
+
},
|
| 51 |
+
"down_blocks.1": {
|
| 52 |
+
"dummy_input": {
|
| 53 |
+
"hidden_states": (2, 320, 64, 64),
|
| 54 |
+
"temb": (2, 1280),
|
| 55 |
+
"encoder_hidden_states": (2, 77, 2048),
|
| 56 |
+
},
|
| 57 |
+
"output_names": ["sample", "res_samples_0", "res_samples_1", "res_samples_2"],
|
| 58 |
+
"dynamic_axes": {
|
| 59 |
+
"hidden_states": {0: "batch_size"},
|
| 60 |
+
"temb": {0: "steps"},
|
| 61 |
+
"encoder_hidden_states": {0: "batch_size"},
|
| 62 |
+
},
|
| 63 |
+
},
|
| 64 |
+
"down_blocks.2": {
|
| 65 |
+
"dummy_input": {
|
| 66 |
+
"hidden_states": (2, 640, 32, 32),
|
| 67 |
+
"temb": (2, 1280),
|
| 68 |
+
"encoder_hidden_states": (2, 77, 2048),
|
| 69 |
+
},
|
| 70 |
+
"output_names": ["sample", "res_samples_0", "res_samples_1"],
|
| 71 |
+
"dynamic_axes": {
|
| 72 |
+
"hidden_states": {0: "batch_size"},
|
| 73 |
+
"temb": {0: "steps"},
|
| 74 |
+
"encoder_hidden_states": {0: "batch_size"},
|
| 75 |
+
},
|
| 76 |
+
},
|
| 77 |
+
"mid_block": {
|
| 78 |
+
"dummy_input": {
|
| 79 |
+
"hidden_states": (2, 1280, 32, 32),
|
| 80 |
+
"temb": (2, 1280),
|
| 81 |
+
"encoder_hidden_states": (2, 77, 2048),
|
| 82 |
+
},
|
| 83 |
+
"output_names": ["sample"],
|
| 84 |
+
"dynamic_axes": {
|
| 85 |
+
"hidden_states": {0: "batch_size"},
|
| 86 |
+
"temb": {0: "steps"},
|
| 87 |
+
"encoder_hidden_states": {0: "batch_size"},
|
| 88 |
+
},
|
| 89 |
+
},
|
| 90 |
+
"up_blocks.0": {
|
| 91 |
+
"dummy_input": {
|
| 92 |
+
"hidden_states": (2, 1280, 32, 32),
|
| 93 |
+
"res_hidden_states_0": (2, 640, 32, 32),
|
| 94 |
+
"res_hidden_states_1": (2, 1280, 32, 32),
|
| 95 |
+
"res_hidden_states_2": (2, 1280, 32, 32),
|
| 96 |
+
"temb": (2, 1280),
|
| 97 |
+
"encoder_hidden_states": (2, 77, 2048),
|
| 98 |
+
},
|
| 99 |
+
"output_names": ["sample"],
|
| 100 |
+
"dynamic_axes": {
|
| 101 |
+
"hidden_states": {0: "batch_size"},
|
| 102 |
+
"temb": {0: "steps"},
|
| 103 |
+
"encoder_hidden_states": {0: "batch_size"},
|
| 104 |
+
"res_hidden_states_0": {0: "batch_size"},
|
| 105 |
+
"res_hidden_states_1": {0: "batch_size"},
|
| 106 |
+
"res_hidden_states_2": {0: "batch_size"},
|
| 107 |
+
},
|
| 108 |
+
},
|
| 109 |
+
"up_blocks.1": {
|
| 110 |
+
"dummy_input": {
|
| 111 |
+
"hidden_states": (2, 1280, 64, 64),
|
| 112 |
+
"res_hidden_states_0": (2, 320, 64, 64),
|
| 113 |
+
"res_hidden_states_1": (2, 640, 64, 64),
|
| 114 |
+
"res_hidden_states_2": (2, 640, 64, 64),
|
| 115 |
+
"temb": (2, 1280),
|
| 116 |
+
"encoder_hidden_states": (2, 77, 2048),
|
| 117 |
+
},
|
| 118 |
+
"output_names": ["sample"],
|
| 119 |
+
"dynamic_axes": {
|
| 120 |
+
"hidden_states": {0: "batch_size"},
|
| 121 |
+
"temb": {0: "steps"},
|
| 122 |
+
"encoder_hidden_states": {0: "batch_size"},
|
| 123 |
+
"res_hidden_states_0": {0: "batch_size"},
|
| 124 |
+
"res_hidden_states_1": {0: "batch_size"},
|
| 125 |
+
"res_hidden_states_2": {0: "batch_size"},
|
| 126 |
+
},
|
| 127 |
+
},
|
| 128 |
+
"up_blocks.2": {
|
| 129 |
+
"dummy_input": {
|
| 130 |
+
"hidden_states": (2, 640, 128, 128),
|
| 131 |
+
"res_hidden_states_0": (2, 320, 128, 128),
|
| 132 |
+
"res_hidden_states_1": (2, 320, 128, 128),
|
| 133 |
+
"res_hidden_states_2": (2, 320, 128, 128),
|
| 134 |
+
"temb": (2, 1280),
|
| 135 |
+
},
|
| 136 |
+
"output_names": ["sample"],
|
| 137 |
+
"dynamic_axes": {
|
| 138 |
+
"hidden_states": {0: "batch_size"},
|
| 139 |
+
"temb": {0: "steps"},
|
| 140 |
+
"res_hidden_states_0": {0: "batch_size"},
|
| 141 |
+
"res_hidden_states_1": {0: "batch_size"},
|
| 142 |
+
"res_hidden_states_2": {0: "batch_size"},
|
| 143 |
+
},
|
| 144 |
+
},
|
| 145 |
+
},
|
| 146 |
+
SD3Transformer2DModel: {
|
| 147 |
+
**{f"transformer_blocks.{i}": sd3_common_transformer_block_config for i in range(23)},
|
| 148 |
+
"transformer_blocks.23": {
|
| 149 |
+
"dummy_input": {
|
| 150 |
+
"hidden_states": (2, 4096, 1536),
|
| 151 |
+
"encoder_hidden_states": (2, 333, 1536),
|
| 152 |
+
"temb": (2, 1536),
|
| 153 |
+
},
|
| 154 |
+
"output_names": ["hidden_states_out"],
|
| 155 |
+
"dynamic_axes": {
|
| 156 |
+
"hidden_states": {0: "batch_size"},
|
| 157 |
+
"encoder_hidden_states": {0: "batch_size"},
|
| 158 |
+
"temb": {0: "steps"},
|
| 159 |
+
},
|
| 160 |
+
},
|
| 161 |
+
},
|
| 162 |
+
}
|
src/trt_pipeline/deploy.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
| 2 |
+
# SPDX-License-Identifier: MIT
|
| 3 |
+
#
|
| 4 |
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
| 5 |
+
# copy of this software and associated documentation files (the "Software"),
|
| 6 |
+
# to deal in the Software without restriction, including without limitation
|
| 7 |
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
| 8 |
+
# and/or sell copies of the Software, and to permit persons to whom the
|
| 9 |
+
# Software is furnished to do so, subject to the following conditions:
|
| 10 |
+
#
|
| 11 |
+
# The above copyright notice and this permission notice shall be included in
|
| 12 |
+
# all copies or substantial portions of the Software.
|
| 13 |
+
#
|
| 14 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 15 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 16 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
| 17 |
+
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 18 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
| 19 |
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
| 20 |
+
# DEALINGS IN THE SOFTWARE.
|
| 21 |
+
|
| 22 |
+
import types
|
| 23 |
+
from pathlib import Path
|
| 24 |
+
|
| 25 |
+
import tensorrt as trt
|
| 26 |
+
import torch
|
| 27 |
+
from cache_diffusion.cachify import CACHED_PIPE, get_model
|
| 28 |
+
from cuda import cudart
|
| 29 |
+
from diffusers.models.transformers.transformer_sd3 import SD3Transformer2DModel
|
| 30 |
+
from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
|
| 31 |
+
from trt_pipeline.config import ONNX_CONFIG
|
| 32 |
+
from trt_pipeline.models.sd3 import sd3_forward
|
| 33 |
+
from trt_pipeline.models.sdxl import (
|
| 34 |
+
cachecrossattnupblock2d_forward,
|
| 35 |
+
cacheunet_forward,
|
| 36 |
+
cacheupblock2d_forward,
|
| 37 |
+
)
|
| 38 |
+
from polygraphy.backend.trt import (
|
| 39 |
+
CreateConfig,
|
| 40 |
+
Profile,
|
| 41 |
+
engine_from_network,
|
| 42 |
+
network_from_onnx_path,
|
| 43 |
+
save_engine,
|
| 44 |
+
)
|
| 45 |
+
from torch.onnx import export as onnx_export
|
| 46 |
+
|
| 47 |
+
from .utils import Engine
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def replace_new_forward(backbone):
|
| 51 |
+
if backbone.__class__ == UNet2DConditionModel:
|
| 52 |
+
backbone.forward = types.MethodType(cacheunet_forward, backbone)
|
| 53 |
+
for upsample_block in backbone.up_blocks:
|
| 54 |
+
if (
|
| 55 |
+
hasattr(upsample_block, "has_cross_attention")
|
| 56 |
+
and upsample_block.has_cross_attention
|
| 57 |
+
):
|
| 58 |
+
upsample_block.forward = types.MethodType(
|
| 59 |
+
cachecrossattnupblock2d_forward, upsample_block
|
| 60 |
+
)
|
| 61 |
+
else:
|
| 62 |
+
upsample_block.forward = types.MethodType(cacheupblock2d_forward, upsample_block)
|
| 63 |
+
elif backbone.__class__ == SD3Transformer2DModel:
|
| 64 |
+
backbone.forward = types.MethodType(sd3_forward, backbone)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def get_input_info(dummy_dict, info: str = None, batch_size: int = 1):
|
| 68 |
+
return_val = [] if info == "profile_shapes" or info == "input_names" else {}
|
| 69 |
+
|
| 70 |
+
def collect_leaf_keys(d):
|
| 71 |
+
for key, value in d.items():
|
| 72 |
+
if isinstance(value, dict):
|
| 73 |
+
collect_leaf_keys(value)
|
| 74 |
+
else:
|
| 75 |
+
value = (value[0] * batch_size,) + value[1:]
|
| 76 |
+
if info == "profile_shapes":
|
| 77 |
+
return_val.append((key, value)) # type: ignore
|
| 78 |
+
elif info == "profile_shapes_dict":
|
| 79 |
+
return_val[key] = value # type: ignore
|
| 80 |
+
elif info == "dummy_input":
|
| 81 |
+
return_val[key] = torch.ones(value).half().cuda() # type: ignore
|
| 82 |
+
elif info == "input_names":
|
| 83 |
+
return_val.append(key) # type: ignore
|
| 84 |
+
|
| 85 |
+
collect_leaf_keys(dummy_dict)
|
| 86 |
+
return return_val
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def get_total_device_memory(backbone):
|
| 90 |
+
max_device_memory = 0
|
| 91 |
+
for _, engine in backbone.engines.items():
|
| 92 |
+
max_device_memory = max(max_device_memory, engine.engine.device_memory_size)
|
| 93 |
+
return max_device_memory
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def load_engines(backbone, engine_path: Path, batch_size: int = 1):
|
| 97 |
+
backbone.engines = {}
|
| 98 |
+
for f in engine_path.iterdir():
|
| 99 |
+
if f.is_file():
|
| 100 |
+
eng = Engine()
|
| 101 |
+
eng.load(str(f))
|
| 102 |
+
backbone.engines[f"{f.stem}"] = eng
|
| 103 |
+
_, shared_device_memory = cudart.cudaMalloc(get_total_device_memory(backbone))
|
| 104 |
+
for engine in backbone.engines.values():
|
| 105 |
+
engine.activate(shared_device_memory)
|
| 106 |
+
backbone.cuda_stream = cudart.cudaStreamCreate()[1]
|
| 107 |
+
for block_name in backbone.engines.keys():
|
| 108 |
+
backbone.engines[block_name].allocate_buffers(
|
| 109 |
+
shape_dict=get_input_info(
|
| 110 |
+
ONNX_CONFIG[backbone.__class__][block_name]["dummy_input"],
|
| 111 |
+
"profile_shapes_dict",
|
| 112 |
+
batch_size,
|
| 113 |
+
),
|
| 114 |
+
device=backbone.device,
|
| 115 |
+
batch_size=batch_size,
|
| 116 |
+
)
|
| 117 |
+
# TODO: Free and clean up the origin pytorch cuda memory
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def warm_up(backbone, batch_size: int = 1):
|
| 121 |
+
print("Warming-up TensorRT engines...")
|
| 122 |
+
for name, engine in backbone.engines.items():
|
| 123 |
+
dummy_input = get_input_info(
|
| 124 |
+
ONNX_CONFIG[backbone.__class__][name]["dummy_input"], "dummy_input", batch_size
|
| 125 |
+
)
|
| 126 |
+
_ = engine(dummy_input, backbone.cuda_stream)
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def teardown(pipe):
|
| 130 |
+
backbone = get_model(pipe)
|
| 131 |
+
for engine in backbone.engines.values():
|
| 132 |
+
del engine
|
| 133 |
+
|
| 134 |
+
cudart.cudaStreamDestroy(backbone.cuda_stream)
|
| 135 |
+
del backbone.cuda_stream
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def load_unet_trt(unet, engine_path: Path, batch_size: int = 1):
|
| 139 |
+
backbone = unet
|
| 140 |
+
engine_path.mkdir(parents=True, exist_ok=True)
|
| 141 |
+
replace_new_forward(backbone)
|
| 142 |
+
load_engines(backbone, engine_path, batch_size)
|
| 143 |
+
warm_up(backbone, batch_size)
|
| 144 |
+
backbone.use_trt_infer = True
|
src/trt_pipeline/models/sd3.py
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
| 2 |
+
# SPDX-License-Identifier: MIT
|
| 3 |
+
#
|
| 4 |
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
| 5 |
+
# copy of this software and associated documentation files (the "Software"),
|
| 6 |
+
# to deal in the Software without restriction, including without limitation
|
| 7 |
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
| 8 |
+
# and/or sell copies of the Software, and to permit persons to whom the
|
| 9 |
+
# Software is furnished to do so, subject to the following conditions:
|
| 10 |
+
#
|
| 11 |
+
# The above copyright notice and this permission notice shall be included in
|
| 12 |
+
# all copies or substantial portions of the Software.
|
| 13 |
+
#
|
| 14 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 15 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 16 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
| 17 |
+
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 18 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
| 19 |
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
| 20 |
+
# DEALINGS IN THE SOFTWARE.
|
| 21 |
+
|
| 22 |
+
from typing import Any, Dict, List, Optional, Union
|
| 23 |
+
|
| 24 |
+
import torch
|
| 25 |
+
from diffusers.models.modeling_outputs import Transformer2DModelOutput
|
| 26 |
+
from diffusers.utils import (
|
| 27 |
+
USE_PEFT_BACKEND,
|
| 28 |
+
is_torch_version,
|
| 29 |
+
scale_lora_layers,
|
| 30 |
+
unscale_lora_layers,
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def sd3_forward(
|
| 35 |
+
self,
|
| 36 |
+
hidden_states: torch.FloatTensor,
|
| 37 |
+
encoder_hidden_states: torch.FloatTensor = None,
|
| 38 |
+
pooled_projections: torch.FloatTensor = None,
|
| 39 |
+
timestep: torch.LongTensor = None,
|
| 40 |
+
block_controlnet_hidden_states: List = None,
|
| 41 |
+
joint_attention_kwargs: Optional[Dict[str, Any]] = None,
|
| 42 |
+
return_dict: bool = True,
|
| 43 |
+
) -> Union[torch.FloatTensor, Transformer2DModelOutput]:
|
| 44 |
+
"""
|
| 45 |
+
The [`SD3Transformer2DModel`] forward method.
|
| 46 |
+
|
| 47 |
+
Args:
|
| 48 |
+
hidden_states (`torch.FloatTensor` of shape `(batch size, channel, height, width)`):
|
| 49 |
+
Input `hidden_states`.
|
| 50 |
+
encoder_hidden_states (`torch.FloatTensor` of shape `(batch size, sequence_len, embed_dims)`):
|
| 51 |
+
Conditional embeddings (embeddings computed from the input conditions such as prompts) to use.
|
| 52 |
+
pooled_projections (`torch.FloatTensor` of shape `(batch_size, projection_dim)`): Embeddings projected
|
| 53 |
+
from the embeddings of input conditions.
|
| 54 |
+
timestep ( `torch.LongTensor`):
|
| 55 |
+
Used to indicate denoising step.
|
| 56 |
+
block_controlnet_hidden_states: (`list` of `torch.Tensor`):
|
| 57 |
+
A list of tensors that if specified are added to the residuals of transformer blocks.
|
| 58 |
+
joint_attention_kwargs (`dict`, *optional*):
|
| 59 |
+
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
| 60 |
+
`self.processor` in
|
| 61 |
+
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
| 62 |
+
return_dict (`bool`, *optional*, defaults to `True`):
|
| 63 |
+
Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain
|
| 64 |
+
tuple.
|
| 65 |
+
|
| 66 |
+
Returns:
|
| 67 |
+
If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a
|
| 68 |
+
`tuple` where the first element is the sample tensor.
|
| 69 |
+
"""
|
| 70 |
+
if joint_attention_kwargs is not None:
|
| 71 |
+
joint_attention_kwargs = joint_attention_kwargs.copy()
|
| 72 |
+
lora_scale = joint_attention_kwargs.pop("scale", 1.0)
|
| 73 |
+
else:
|
| 74 |
+
lora_scale = 1.0
|
| 75 |
+
|
| 76 |
+
if USE_PEFT_BACKEND:
|
| 77 |
+
# weight the lora layers by setting `lora_scale` for each PEFT layer
|
| 78 |
+
scale_lora_layers(self, lora_scale)
|
| 79 |
+
|
| 80 |
+
height, width = hidden_states.shape[-2:]
|
| 81 |
+
|
| 82 |
+
hidden_states = self.pos_embed(hidden_states) # takes care of adding positional embeddings too.
|
| 83 |
+
temb = self.time_text_embed(timestep, pooled_projections)
|
| 84 |
+
encoder_hidden_states = self.context_embedder(encoder_hidden_states)
|
| 85 |
+
|
| 86 |
+
for index_block, block in enumerate(self.transformer_blocks):
|
| 87 |
+
if self.training and self.gradient_checkpointing:
|
| 88 |
+
|
| 89 |
+
def create_custom_forward(module, return_dict=None):
|
| 90 |
+
def custom_forward(*inputs):
|
| 91 |
+
if return_dict is not None:
|
| 92 |
+
return module(*inputs, return_dict=return_dict)
|
| 93 |
+
else:
|
| 94 |
+
return module(*inputs)
|
| 95 |
+
|
| 96 |
+
return custom_forward
|
| 97 |
+
|
| 98 |
+
ckpt_kwargs: Dict[str, Any] = (
|
| 99 |
+
{"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
|
| 100 |
+
)
|
| 101 |
+
encoder_hidden_states, hidden_states = torch.utils.checkpoint.checkpoint(
|
| 102 |
+
create_custom_forward(block),
|
| 103 |
+
hidden_states,
|
| 104 |
+
encoder_hidden_states,
|
| 105 |
+
temb,
|
| 106 |
+
**ckpt_kwargs,
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
else:
|
| 110 |
+
if hasattr(self, "use_trt_infer") and self.use_trt_infer:
|
| 111 |
+
feed_dict = {
|
| 112 |
+
"hidden_states": hidden_states,
|
| 113 |
+
"encoder_hidden_states": encoder_hidden_states,
|
| 114 |
+
"temb": temb,
|
| 115 |
+
}
|
| 116 |
+
_results = self.engines[f"transformer_blocks.{index_block}"](
|
| 117 |
+
feed_dict, self.cuda_stream
|
| 118 |
+
)
|
| 119 |
+
if index_block != 23:
|
| 120 |
+
encoder_hidden_states = _results["encoder_hidden_states_out"]
|
| 121 |
+
hidden_states = _results["hidden_states_out"]
|
| 122 |
+
else:
|
| 123 |
+
encoder_hidden_states, hidden_states = block(
|
| 124 |
+
hidden_states=hidden_states,
|
| 125 |
+
encoder_hidden_states=encoder_hidden_states,
|
| 126 |
+
temb=temb,
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
# controlnet residual
|
| 130 |
+
if block_controlnet_hidden_states is not None and block.context_pre_only is False:
|
| 131 |
+
interval_control = len(self.transformer_blocks) // len(block_controlnet_hidden_states)
|
| 132 |
+
hidden_states = (
|
| 133 |
+
hidden_states + block_controlnet_hidden_states[index_block // interval_control]
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
hidden_states = self.norm_out(hidden_states, temb)
|
| 137 |
+
hidden_states = self.proj_out(hidden_states)
|
| 138 |
+
|
| 139 |
+
# unpatchify
|
| 140 |
+
patch_size = self.config.patch_size
|
| 141 |
+
height = height // patch_size
|
| 142 |
+
width = width // patch_size
|
| 143 |
+
|
| 144 |
+
hidden_states = hidden_states.reshape(
|
| 145 |
+
shape=(hidden_states.shape[0], height, width, patch_size, patch_size, self.out_channels)
|
| 146 |
+
)
|
| 147 |
+
hidden_states = torch.einsum("nhwpqc->nchpwq", hidden_states)
|
| 148 |
+
output = hidden_states.reshape(
|
| 149 |
+
shape=(hidden_states.shape[0], self.out_channels, height * patch_size, width * patch_size)
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
if USE_PEFT_BACKEND:
|
| 153 |
+
# remove `lora_scale` from each PEFT layer
|
| 154 |
+
unscale_lora_layers(self, lora_scale)
|
| 155 |
+
|
| 156 |
+
if not return_dict:
|
| 157 |
+
return (output,)
|
| 158 |
+
|
| 159 |
+
return Transformer2DModelOutput(sample=output)
|
src/trt_pipeline/models/sdxl.py
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Adapted from
|
| 2 |
+
# https://github.com/huggingface/diffusers/blob/73acebb8cfbd1d2954cabe1af4185f9994e61917/src/diffusers/models/unets/unet_2d_condition.py#L1039-L1312
|
| 3 |
+
# https://github.com/huggingface/diffusers/blob/73acebb8cfbd1d2954cabe1af4185f9994e61917/src/diffusers/models/unets/unet_2d_blocks.py#L2482-L2564
|
| 4 |
+
# https://github.com/huggingface/diffusers/blob/73acebb8cfbd1d2954cabe1af4185f9994e61917/src/diffusers/models/unets/unet_2d_blocks.py#L2617-L2679
|
| 5 |
+
|
| 6 |
+
# Copyright 2024 The HuggingFace Team. All rights reserved.
|
| 7 |
+
#
|
| 8 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 9 |
+
# you may not use this file except in compliance with the License.
|
| 10 |
+
# You may obtain a copy of the License at
|
| 11 |
+
#
|
| 12 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 13 |
+
#
|
| 14 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 15 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 16 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 17 |
+
# See the License for the specific language governing permissions and
|
| 18 |
+
# limitations under the License.
|
| 19 |
+
#
|
| 20 |
+
# Not a contribution
|
| 21 |
+
# Changes made by NVIDIA CORPORATION & AFFILIATES or otherwise documented as
|
| 22 |
+
# NVIDIA-proprietary are not a contribution and subject to the following terms and conditions:
|
| 23 |
+
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
| 24 |
+
# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
|
| 25 |
+
#
|
| 26 |
+
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
|
| 27 |
+
# property and proprietary rights in and to this material, related
|
| 28 |
+
# documentation and any modifications thereto. Any use, reproduction,
|
| 29 |
+
# disclosure or distribution of this material and related documentation
|
| 30 |
+
# without an express license agreement from NVIDIA CORPORATION or
|
| 31 |
+
# its affiliates is strictly prohibited.
|
| 32 |
+
|
| 33 |
+
from typing import Any, Dict, Optional, Tuple, Union
|
| 34 |
+
|
| 35 |
+
import torch
|
| 36 |
+
from diffusers.models.unets.unet_2d_condition import UNet2DConditionOutput
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def cachecrossattnupblock2d_forward(
|
| 40 |
+
self,
|
| 41 |
+
hidden_states: torch.FloatTensor,
|
| 42 |
+
res_hidden_states_0: torch.FloatTensor,
|
| 43 |
+
res_hidden_states_1: torch.FloatTensor,
|
| 44 |
+
res_hidden_states_2: torch.FloatTensor,
|
| 45 |
+
temb: Optional[torch.FloatTensor] = None,
|
| 46 |
+
encoder_hidden_states: Optional[torch.FloatTensor] = None,
|
| 47 |
+
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
| 48 |
+
upsample_size: Optional[int] = None,
|
| 49 |
+
attention_mask: Optional[torch.FloatTensor] = None,
|
| 50 |
+
encoder_attention_mask: Optional[torch.FloatTensor] = None,
|
| 51 |
+
) -> torch.FloatTensor:
|
| 52 |
+
res_hidden_states_tuple = (res_hidden_states_0, res_hidden_states_1, res_hidden_states_2)
|
| 53 |
+
for resnet, attn in zip(self.resnets, self.attentions):
|
| 54 |
+
# pop res hidden states
|
| 55 |
+
res_hidden_states = res_hidden_states_tuple[-1]
|
| 56 |
+
res_hidden_states_tuple = res_hidden_states_tuple[:-1]
|
| 57 |
+
|
| 58 |
+
hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1)
|
| 59 |
+
|
| 60 |
+
hidden_states = resnet(hidden_states, temb)
|
| 61 |
+
hidden_states = attn(
|
| 62 |
+
hidden_states,
|
| 63 |
+
encoder_hidden_states=encoder_hidden_states,
|
| 64 |
+
cross_attention_kwargs=cross_attention_kwargs,
|
| 65 |
+
attention_mask=attention_mask,
|
| 66 |
+
encoder_attention_mask=encoder_attention_mask,
|
| 67 |
+
return_dict=False,
|
| 68 |
+
)[0]
|
| 69 |
+
|
| 70 |
+
if self.upsamplers is not None:
|
| 71 |
+
for upsampler in self.upsamplers:
|
| 72 |
+
hidden_states = upsampler(hidden_states, upsample_size)
|
| 73 |
+
|
| 74 |
+
return hidden_states
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def cacheupblock2d_forward(
|
| 78 |
+
self,
|
| 79 |
+
hidden_states: torch.FloatTensor,
|
| 80 |
+
res_hidden_states_0: torch.FloatTensor,
|
| 81 |
+
res_hidden_states_1: torch.FloatTensor,
|
| 82 |
+
res_hidden_states_2: torch.FloatTensor,
|
| 83 |
+
temb: Optional[torch.FloatTensor] = None,
|
| 84 |
+
upsample_size: Optional[int] = None,
|
| 85 |
+
) -> torch.FloatTensor:
|
| 86 |
+
res_hidden_states_tuple = (res_hidden_states_0, res_hidden_states_1, res_hidden_states_2)
|
| 87 |
+
for resnet in self.resnets:
|
| 88 |
+
# pop res hidden states
|
| 89 |
+
res_hidden_states = res_hidden_states_tuple[-1]
|
| 90 |
+
res_hidden_states_tuple = res_hidden_states_tuple[:-1]
|
| 91 |
+
|
| 92 |
+
hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1)
|
| 93 |
+
|
| 94 |
+
hidden_states = resnet(hidden_states, temb)
|
| 95 |
+
|
| 96 |
+
if self.upsamplers is not None:
|
| 97 |
+
for upsampler in self.upsamplers:
|
| 98 |
+
hidden_states = upsampler(hidden_states, upsample_size)
|
| 99 |
+
|
| 100 |
+
return hidden_states
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def cacheunet_forward(
|
| 104 |
+
self,
|
| 105 |
+
sample: torch.FloatTensor,
|
| 106 |
+
timestep: Union[torch.Tensor, float, int],
|
| 107 |
+
encoder_hidden_states: torch.Tensor,
|
| 108 |
+
class_labels: Optional[torch.Tensor] = None,
|
| 109 |
+
timestep_cond: Optional[torch.Tensor] = None,
|
| 110 |
+
attention_mask: Optional[torch.Tensor] = None,
|
| 111 |
+
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
| 112 |
+
added_cond_kwargs: Optional[Dict[str, torch.Tensor]] = None,
|
| 113 |
+
down_block_additional_residuals: Optional[Tuple[torch.Tensor]] = None,
|
| 114 |
+
mid_block_additional_residual: Optional[torch.Tensor] = None,
|
| 115 |
+
down_intrablock_additional_residuals: Optional[Tuple[torch.Tensor]] = None,
|
| 116 |
+
encoder_attention_mask: Optional[torch.Tensor] = None,
|
| 117 |
+
return_dict: bool = True,
|
| 118 |
+
) -> Union[UNet2DConditionOutput, Tuple]:
|
| 119 |
+
# 1. time
|
| 120 |
+
t_emb = self.get_time_embed(sample=sample, timestep=timestep)
|
| 121 |
+
emb = self.time_embedding(t_emb, timestep_cond)
|
| 122 |
+
aug_emb = None
|
| 123 |
+
|
| 124 |
+
aug_emb = self.get_aug_embed(
|
| 125 |
+
emb=emb,
|
| 126 |
+
encoder_hidden_states=encoder_hidden_states,
|
| 127 |
+
added_cond_kwargs=added_cond_kwargs,
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
emb = emb + aug_emb if aug_emb is not None else emb
|
| 131 |
+
|
| 132 |
+
encoder_hidden_states = self.process_encoder_hidden_states(
|
| 133 |
+
encoder_hidden_states=encoder_hidden_states, added_cond_kwargs=added_cond_kwargs
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
# 2. pre-process
|
| 137 |
+
sample = self.conv_in(sample)
|
| 138 |
+
|
| 139 |
+
if hasattr(self, "_export_precess_onnx") and self._export_precess_onnx:
|
| 140 |
+
return (
|
| 141 |
+
sample,
|
| 142 |
+
encoder_hidden_states,
|
| 143 |
+
emb,
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
down_block_res_samples = (sample,)
|
| 147 |
+
for i, downsample_block in enumerate(self.down_blocks):
|
| 148 |
+
if (
|
| 149 |
+
hasattr(downsample_block, "has_cross_attention")
|
| 150 |
+
and downsample_block.has_cross_attention
|
| 151 |
+
):
|
| 152 |
+
if hasattr(self, "use_trt_infer") and self.use_trt_infer:
|
| 153 |
+
feed_dict = {
|
| 154 |
+
"hidden_states": sample,
|
| 155 |
+
"temb": emb,
|
| 156 |
+
"encoder_hidden_states": encoder_hidden_states,
|
| 157 |
+
}
|
| 158 |
+
down_results = self.engines[f"down_blocks.{i}"](feed_dict, self.cuda_stream)
|
| 159 |
+
sample = down_results["sample"]
|
| 160 |
+
res_samples_0 = down_results["res_samples_0"]
|
| 161 |
+
res_samples_1 = down_results["res_samples_1"]
|
| 162 |
+
if "res_samples_2" in down_results.keys():
|
| 163 |
+
res_samples_2 = down_results["res_samples_2"]
|
| 164 |
+
else:
|
| 165 |
+
# For t2i-adapter CrossAttnDownBlock2D
|
| 166 |
+
additional_residuals = {}
|
| 167 |
+
|
| 168 |
+
sample, res_samples = downsample_block(
|
| 169 |
+
hidden_states=sample,
|
| 170 |
+
temb=emb,
|
| 171 |
+
encoder_hidden_states=encoder_hidden_states,
|
| 172 |
+
attention_mask=attention_mask,
|
| 173 |
+
cross_attention_kwargs=cross_attention_kwargs,
|
| 174 |
+
encoder_attention_mask=encoder_attention_mask,
|
| 175 |
+
**additional_residuals,
|
| 176 |
+
)
|
| 177 |
+
else:
|
| 178 |
+
if hasattr(self, "use_trt_infer") and self.use_trt_infer:
|
| 179 |
+
feed_dict = {"hidden_states": sample, "temb": emb}
|
| 180 |
+
down_results = self.engines[f"down_blocks.{i}"](feed_dict, self.cuda_stream)
|
| 181 |
+
sample = down_results["sample"]
|
| 182 |
+
res_samples_0 = down_results["res_samples_0"]
|
| 183 |
+
res_samples_1 = down_results["res_samples_1"]
|
| 184 |
+
if "res_samples_2" in down_results.keys():
|
| 185 |
+
res_samples_2 = down_results["res_samples_2"]
|
| 186 |
+
else:
|
| 187 |
+
sample, res_samples = downsample_block(hidden_states=sample, temb=emb)
|
| 188 |
+
|
| 189 |
+
if hasattr(self, "use_trt_infer") and self.use_trt_infer:
|
| 190 |
+
down_block_res_samples += (
|
| 191 |
+
res_samples_0,
|
| 192 |
+
res_samples_1,
|
| 193 |
+
)
|
| 194 |
+
if "res_samples_2" in down_results.keys():
|
| 195 |
+
down_block_res_samples += (res_samples_2,)
|
| 196 |
+
else:
|
| 197 |
+
down_block_res_samples += res_samples
|
| 198 |
+
|
| 199 |
+
if hasattr(self, "use_trt_infer") and self.use_trt_infer:
|
| 200 |
+
feed_dict = {
|
| 201 |
+
"hidden_states": sample,
|
| 202 |
+
"temb": emb,
|
| 203 |
+
"encoder_hidden_states": encoder_hidden_states,
|
| 204 |
+
}
|
| 205 |
+
mid_results = self.engines["mid_block"](feed_dict, self.cuda_stream)
|
| 206 |
+
sample = mid_results["sample"]
|
| 207 |
+
else:
|
| 208 |
+
sample = self.mid_block(
|
| 209 |
+
sample,
|
| 210 |
+
emb,
|
| 211 |
+
encoder_hidden_states=encoder_hidden_states,
|
| 212 |
+
attention_mask=attention_mask,
|
| 213 |
+
cross_attention_kwargs=cross_attention_kwargs,
|
| 214 |
+
encoder_attention_mask=encoder_attention_mask,
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
# 5. up
|
| 218 |
+
for i, upsample_block in enumerate(self.up_blocks):
|
| 219 |
+
res_samples = down_block_res_samples[-len(upsample_block.resnets) :]
|
| 220 |
+
down_block_res_samples = down_block_res_samples[: -len(upsample_block.resnets)]
|
| 221 |
+
|
| 222 |
+
if hasattr(upsample_block, "has_cross_attention") and upsample_block.has_cross_attention:
|
| 223 |
+
if hasattr(self, "use_trt_infer") and self.use_trt_infer:
|
| 224 |
+
feed_dict = {
|
| 225 |
+
"hidden_states": sample,
|
| 226 |
+
"res_hidden_states_0": res_samples[0],
|
| 227 |
+
"res_hidden_states_1": res_samples[1],
|
| 228 |
+
"res_hidden_states_2": res_samples[2],
|
| 229 |
+
"temb": emb,
|
| 230 |
+
"encoder_hidden_states": encoder_hidden_states,
|
| 231 |
+
}
|
| 232 |
+
up_results = self.engines[f"up_blocks.{i}"](feed_dict, self.cuda_stream)
|
| 233 |
+
sample = up_results["sample"]
|
| 234 |
+
else:
|
| 235 |
+
sample = upsample_block(
|
| 236 |
+
hidden_states=sample,
|
| 237 |
+
temb=emb,
|
| 238 |
+
res_hidden_states_0=res_samples[0],
|
| 239 |
+
res_hidden_states_1=res_samples[1],
|
| 240 |
+
res_hidden_states_2=res_samples[2],
|
| 241 |
+
encoder_hidden_states=encoder_hidden_states,
|
| 242 |
+
cross_attention_kwargs=cross_attention_kwargs,
|
| 243 |
+
attention_mask=attention_mask,
|
| 244 |
+
encoder_attention_mask=encoder_attention_mask,
|
| 245 |
+
)
|
| 246 |
+
else:
|
| 247 |
+
if hasattr(self, "use_trt_infer") and self.use_trt_infer:
|
| 248 |
+
feed_dict = {
|
| 249 |
+
"hidden_states": sample,
|
| 250 |
+
"res_hidden_states_0": res_samples[0],
|
| 251 |
+
"res_hidden_states_1": res_samples[1],
|
| 252 |
+
"res_hidden_states_2": res_samples[2],
|
| 253 |
+
"temb": emb,
|
| 254 |
+
}
|
| 255 |
+
up_results = self.engines[f"up_blocks.{i}"](feed_dict, self.cuda_stream)
|
| 256 |
+
sample = up_results["sample"]
|
| 257 |
+
else:
|
| 258 |
+
sample = upsample_block(
|
| 259 |
+
hidden_states=sample,
|
| 260 |
+
temb=emb,
|
| 261 |
+
res_hidden_states_0=res_samples[0],
|
| 262 |
+
res_hidden_states_1=res_samples[1],
|
| 263 |
+
res_hidden_states_2=res_samples[2],
|
| 264 |
+
)
|
| 265 |
+
|
| 266 |
+
# 6. post-process
|
| 267 |
+
if self.conv_norm_out:
|
| 268 |
+
sample = self.conv_norm_out(sample)
|
| 269 |
+
sample = self.conv_act(sample)
|
| 270 |
+
sample = self.conv_out(sample)
|
| 271 |
+
|
| 272 |
+
if not return_dict:
|
| 273 |
+
return (sample,)
|
| 274 |
+
|
| 275 |
+
return UNet2DConditionOutput(sample=sample)
|
src/trt_pipeline/utils.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
| 2 |
+
# SPDX-License-Identifier: MIT
|
| 3 |
+
#
|
| 4 |
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
| 5 |
+
# copy of this software and associated documentation files (the "Software"),
|
| 6 |
+
# to deal in the Software without restriction, including without limitation
|
| 7 |
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
| 8 |
+
# and/or sell copies of the Software, and to permit persons to whom the
|
| 9 |
+
# Software is furnished to do so, subject to the following conditions:
|
| 10 |
+
#
|
| 11 |
+
# The above copyright notice and this permission notice shall be included in
|
| 12 |
+
# all copies or substantial portions of the Software.
|
| 13 |
+
#
|
| 14 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 15 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 16 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
| 17 |
+
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 18 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
| 19 |
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
| 20 |
+
# DEALINGS IN THE SOFTWARE.
|
| 21 |
+
|
| 22 |
+
from collections import OrderedDict
|
| 23 |
+
|
| 24 |
+
import numpy as np
|
| 25 |
+
import tensorrt as trt
|
| 26 |
+
import torch
|
| 27 |
+
from cuda import cudart
|
| 28 |
+
from polygraphy.backend.common import bytes_from_path
|
| 29 |
+
from polygraphy.backend.trt import engine_from_bytes
|
| 30 |
+
|
| 31 |
+
numpy_to_torch_dtype_dict = {
|
| 32 |
+
np.uint8: torch.uint8,
|
| 33 |
+
np.int8: torch.int8,
|
| 34 |
+
np.int16: torch.int16,
|
| 35 |
+
np.int32: torch.int32,
|
| 36 |
+
np.int64: torch.int64,
|
| 37 |
+
np.float16: torch.float16,
|
| 38 |
+
np.float32: torch.float32,
|
| 39 |
+
np.float64: torch.float64,
|
| 40 |
+
np.complex64: torch.complex64,
|
| 41 |
+
np.complex128: torch.complex128,
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class Engine:
|
| 46 |
+
def __init__(
|
| 47 |
+
self,
|
| 48 |
+
):
|
| 49 |
+
self.engine = None
|
| 50 |
+
self.context = None
|
| 51 |
+
self.buffers = OrderedDict()
|
| 52 |
+
self.tensors = OrderedDict()
|
| 53 |
+
self.cuda_graph_instance = None # cuda graph
|
| 54 |
+
self.has_cross_attention = False
|
| 55 |
+
|
| 56 |
+
def __del__(self):
|
| 57 |
+
del self.engine
|
| 58 |
+
del self.context
|
| 59 |
+
del self.buffers
|
| 60 |
+
del self.tensors
|
| 61 |
+
|
| 62 |
+
def load(self, engine_path):
|
| 63 |
+
self.engine = engine_from_bytes(bytes_from_path(engine_path))
|
| 64 |
+
|
| 65 |
+
def activate(self, reuse_device_memory=None):
|
| 66 |
+
if reuse_device_memory:
|
| 67 |
+
self.context = self.engine.create_execution_context_without_device_memory() # type: ignore
|
| 68 |
+
self.context.device_memory = reuse_device_memory
|
| 69 |
+
else:
|
| 70 |
+
self.context = self.engine.create_execution_context() # type: ignore
|
| 71 |
+
|
| 72 |
+
def allocate_buffers(self, shape_dict=None, device="cuda", batch_size=1):
|
| 73 |
+
for binding in range(self.engine.num_io_tensors): # type: ignore
|
| 74 |
+
name = self.engine.get_tensor_name(binding) # type: ignore
|
| 75 |
+
if shape_dict and name in shape_dict:
|
| 76 |
+
shape = shape_dict[name]
|
| 77 |
+
else:
|
| 78 |
+
shape = self.engine.get_tensor_shape(name) # type: ignore
|
| 79 |
+
shape = (batch_size * 2,) + shape[1:]
|
| 80 |
+
dtype = trt.nptype(self.engine.get_tensor_dtype(name)) # type: ignore
|
| 81 |
+
if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT: # type: ignore
|
| 82 |
+
self.context.set_input_shape(name, shape) # type: ignore
|
| 83 |
+
tensor = torch.empty(tuple(shape), dtype=numpy_to_torch_dtype_dict[dtype]).to(
|
| 84 |
+
device=device
|
| 85 |
+
)
|
| 86 |
+
self.tensors[name] = tensor
|
| 87 |
+
|
| 88 |
+
def __call__(self, feed_dict, stream, use_cuda_graph=False):
|
| 89 |
+
for name, buf in feed_dict.items():
|
| 90 |
+
self.tensors[name].copy_(buf)
|
| 91 |
+
|
| 92 |
+
for name, tensor in self.tensors.items():
|
| 93 |
+
self.context.set_tensor_address(name, tensor.data_ptr()) # type: ignore
|
| 94 |
+
|
| 95 |
+
if use_cuda_graph:
|
| 96 |
+
if self.cuda_graph_instance is not None:
|
| 97 |
+
cuassert(cudart.cudaGraphLaunch(self.cuda_graph_instance, stream))
|
| 98 |
+
cuassert(cudart.cudaStreamSynchronize(stream))
|
| 99 |
+
else:
|
| 100 |
+
# do inference before CUDA graph capture
|
| 101 |
+
noerror = self.context.execute_async_v3(stream) # type: ignore
|
| 102 |
+
if not noerror:
|
| 103 |
+
raise ValueError("ERROR: inference failed.")
|
| 104 |
+
# capture cuda graph
|
| 105 |
+
cuassert(
|
| 106 |
+
cudart.cudaStreamBeginCapture(
|
| 107 |
+
stream, cudart.cudaStreamCaptureMode.cudaStreamCaptureModeGlobal
|
| 108 |
+
)
|
| 109 |
+
)
|
| 110 |
+
self.context.execute_async_v3(stream) # type: ignore
|
| 111 |
+
self.graph = cuassert(cudart.cudaStreamEndCapture(stream))
|
| 112 |
+
self.cuda_graph_instance = cuassert(cudart.cudaGraphInstantiate(self.graph, 0))
|
| 113 |
+
else:
|
| 114 |
+
noerror = self.context.execute_async_v3(stream) # type: ignore
|
| 115 |
+
if not noerror:
|
| 116 |
+
raise ValueError("ERROR: inference failed.")
|
| 117 |
+
|
| 118 |
+
return self.tensors
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def cuassert(cuda_ret):
|
| 122 |
+
err = cuda_ret[0]
|
| 123 |
+
if err != cudart.cudaError_t.cudaSuccess:
|
| 124 |
+
raise RuntimeError(
|
| 125 |
+
f"CUDA ERROR: {err}, error code reference: https://nvidia.github.io/cuda-python/module/cudart.html#cuda.cudart.cudaError_t"
|
| 126 |
+
)
|
| 127 |
+
if len(cuda_ret) > 1:
|
| 128 |
+
return cuda_ret[1]
|
| 129 |
+
return None
|
uv.lock
CHANGED
|
@@ -34,19 +34,6 @@ version = "4.9.3"
|
|
| 34 |
source = { registry = "https://pypi.org/simple" }
|
| 35 |
sdist = { url = "https://files.pythonhosted.org/packages/3e/38/7859ff46355f76f8d19459005ca000b6e7012f2f1ca597746cbcd1fbfe5e/antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b", size = 117034 }
|
| 36 |
|
| 37 |
-
[[package]]
|
| 38 |
-
name = "bitsandbytes"
|
| 39 |
-
version = "0.44.1"
|
| 40 |
-
source = { registry = "https://pypi.org/simple" }
|
| 41 |
-
dependencies = [
|
| 42 |
-
{ name = "numpy" },
|
| 43 |
-
{ name = "torch" },
|
| 44 |
-
]
|
| 45 |
-
wheels = [
|
| 46 |
-
{ url = "https://files.pythonhosted.org/packages/e4/e6/ccb84da7ffaf208a71c2c3c8e1120b34759df640db959660be9a98505eb4/bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl", hash = "sha256:b2f24c6cbf11fc8c5d69b3dcecee9f7011451ec59d6ac833e873c9f105259668", size = 122419627 },
|
| 47 |
-
{ url = "https://files.pythonhosted.org/packages/5f/f5/11bddebb5addc0a005b0c1cecc6e4c6e4055ad7b860bdcbf6374e12a51f5/bitsandbytes-0.44.1-py3-none-win_amd64.whl", hash = "sha256:8e68e12aa25d2cf9a1730ad72890a5d1a19daa23f459a6a4679331f353d58cb4", size = 121451331 },
|
| 48 |
-
]
|
| 49 |
-
|
| 50 |
[[package]]
|
| 51 |
name = "certifi"
|
| 52 |
version = "2024.8.30"
|
|
@@ -89,9 +76,19 @@ wheels = [
|
|
| 89 |
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 },
|
| 90 |
]
|
| 91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
[[package]]
|
| 93 |
name = "diffusers"
|
| 94 |
-
version = "0.
|
| 95 |
source = { registry = "https://pypi.org/simple" }
|
| 96 |
dependencies = [
|
| 97 |
{ name = "filelock" },
|
|
@@ -103,9 +100,9 @@ dependencies = [
|
|
| 103 |
{ name = "requests" },
|
| 104 |
{ name = "safetensors" },
|
| 105 |
]
|
| 106 |
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
| 107 |
wheels = [
|
| 108 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 109 |
]
|
| 110 |
|
| 111 |
[[package]]
|
|
@@ -114,51 +111,43 @@ version = "6"
|
|
| 114 |
source = { editable = "." }
|
| 115 |
dependencies = [
|
| 116 |
{ name = "accelerate" },
|
| 117 |
-
{ name = "
|
| 118 |
{ name = "diffusers" },
|
| 119 |
{ name = "edge-maxxing-pipelines" },
|
| 120 |
-
{ name = "huggingface-hub" },
|
| 121 |
-
{ name = "numpy" },
|
| 122 |
{ name = "omegaconf" },
|
| 123 |
-
{ name = "
|
| 124 |
-
{ name = "
|
| 125 |
-
{ name = "oneflow" },
|
| 126 |
{ name = "setuptools" },
|
| 127 |
-
{ name = "
|
| 128 |
-
{ name = "
|
|
|
|
| 129 |
{ name = "torch" },
|
| 130 |
-
{ name = "torchvision" },
|
| 131 |
{ name = "transformers" },
|
| 132 |
-
{ name = "
|
| 133 |
-
{ name = "xformers" },
|
| 134 |
]
|
| 135 |
|
| 136 |
[package.metadata]
|
| 137 |
requires-dist = [
|
| 138 |
{ name = "accelerate", specifier = "==0.31.0" },
|
| 139 |
-
{ name = "
|
| 140 |
-
{ name = "diffusers", specifier = "==0.
|
| 141 |
-
{ name = "edge-maxxing-pipelines", git = "https://github.com/womboai/edge-maxxing?subdirectory=pipelines" },
|
| 142 |
-
{ name = "huggingface-hub", specifier = "==0.25.2" },
|
| 143 |
-
{ name = "numpy", specifier = "==1.26.4" },
|
| 144 |
{ name = "omegaconf", specifier = "==2.3.0" },
|
| 145 |
-
{ name = "
|
| 146 |
-
{ name = "
|
| 147 |
-
{ name = "oneflow", url = "https://github.com/siliconflow/oneflow_releases/releases/download/community_cu118/oneflow-0.9.1.dev20240802%2Bcu118-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" },
|
| 148 |
{ name = "setuptools", specifier = ">=75.2.0" },
|
| 149 |
-
{ name = "
|
| 150 |
-
{ name = "
|
| 151 |
-
{ name = "
|
| 152 |
-
{ name = "
|
| 153 |
{ name = "transformers", specifier = "==4.41.2" },
|
| 154 |
-
{ name = "
|
| 155 |
-
{ name = "xformers", specifier = "==0.0.25.post1" },
|
| 156 |
]
|
| 157 |
|
| 158 |
[[package]]
|
| 159 |
name = "edge-maxxing-pipelines"
|
| 160 |
version = "1.0.0"
|
| 161 |
-
source = { git = "https://github.com/womboai/edge-maxxing?subdirectory=pipelines#
|
| 162 |
dependencies = [
|
| 163 |
{ name = "pydantic" },
|
| 164 |
]
|
|
@@ -183,7 +172,7 @@ wheels = [
|
|
| 183 |
|
| 184 |
[[package]]
|
| 185 |
name = "huggingface-hub"
|
| 186 |
-
version = "0.
|
| 187 |
source = { registry = "https://pypi.org/simple" }
|
| 188 |
dependencies = [
|
| 189 |
{ name = "filelock" },
|
|
@@ -194,9 +183,9 @@ dependencies = [
|
|
| 194 |
{ name = "tqdm" },
|
| 195 |
{ name = "typing-extensions" },
|
| 196 |
]
|
| 197 |
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
| 198 |
wheels = [
|
| 199 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 200 |
]
|
| 201 |
|
| 202 |
[[package]]
|
|
@@ -232,18 +221,6 @@ wheels = [
|
|
| 232 |
{ url = "https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d", size = 133271 },
|
| 233 |
]
|
| 234 |
|
| 235 |
-
[[package]]
|
| 236 |
-
name = "markdown-it-py"
|
| 237 |
-
version = "3.0.0"
|
| 238 |
-
source = { registry = "https://pypi.org/simple" }
|
| 239 |
-
dependencies = [
|
| 240 |
-
{ name = "mdurl" },
|
| 241 |
-
]
|
| 242 |
-
sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596 }
|
| 243 |
-
wheels = [
|
| 244 |
-
{ url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 },
|
| 245 |
-
]
|
| 246 |
-
|
| 247 |
[[package]]
|
| 248 |
name = "markupsafe"
|
| 249 |
version = "3.0.2"
|
|
@@ -262,15 +239,6 @@ wheels = [
|
|
| 262 |
{ url = "https://files.pythonhosted.org/packages/44/06/e7175d06dd6e9172d4a69a72592cb3f7a996a9c396eee29082826449bbc3/MarkupSafe-3.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:6af100e168aa82a50e186c82875a5893c5597a0c1ccdb0d8b40240b1f28b969a", size = 15514 },
|
| 263 |
]
|
| 264 |
|
| 265 |
-
[[package]]
|
| 266 |
-
name = "mdurl"
|
| 267 |
-
version = "0.1.2"
|
| 268 |
-
source = { registry = "https://pypi.org/simple" }
|
| 269 |
-
sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729 }
|
| 270 |
-
wheels = [
|
| 271 |
-
{ url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 },
|
| 272 |
-
]
|
| 273 |
-
|
| 274 |
[[package]]
|
| 275 |
name = "mpmath"
|
| 276 |
version = "1.3.0"
|
|
@@ -291,29 +259,24 @@ wheels = [
|
|
| 291 |
|
| 292 |
[[package]]
|
| 293 |
name = "numpy"
|
| 294 |
-
version = "1.
|
| 295 |
-
source = { registry = "https://pypi.org/simple" }
|
| 296 |
-
sdist = { url = "https://files.pythonhosted.org/packages/65/6e/09db70a523a96d25e115e71cc56a6f9031e7b8cd166c1ac8438307c14058/numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010", size = 15786129 }
|
| 297 |
-
wheels = [
|
| 298 |
-
{ url = "https://files.pythonhosted.org/packages/a7/94/ace0fdea5241a27d13543ee117cbc65868e82213fb31a8eb7fe9ff23f313/numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0", size = 20631468 },
|
| 299 |
-
{ url = "https://files.pythonhosted.org/packages/20/f7/b24208eba89f9d1b58c1668bc6c8c4fd472b20c45573cb767f59d49fb0f6/numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a", size = 13966411 },
|
| 300 |
-
{ url = "https://files.pythonhosted.org/packages/fc/a5/4beee6488160798683eed5bdb7eead455892c3b4e1f78d79d8d3f3b084ac/numpy-1.26.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4", size = 14219016 },
|
| 301 |
-
{ url = "https://files.pythonhosted.org/packages/4b/d7/ecf66c1cd12dc28b4040b15ab4d17b773b87fa9d29ca16125de01adb36cd/numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f", size = 18240889 },
|
| 302 |
-
{ url = "https://files.pythonhosted.org/packages/24/03/6f229fe3187546435c4f6f89f6d26c129d4f5bed40552899fcf1f0bf9e50/numpy-1.26.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a", size = 13876746 },
|
| 303 |
-
{ url = "https://files.pythonhosted.org/packages/39/fe/39ada9b094f01f5a35486577c848fe274e374bbf8d8f472e1423a0bbd26d/numpy-1.26.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2", size = 18078620 },
|
| 304 |
-
{ url = "https://files.pythonhosted.org/packages/d5/ef/6ad11d51197aad206a9ad2286dc1aac6a378059e06e8cf22cd08ed4f20dc/numpy-1.26.4-cp310-cp310-win32.whl", hash = "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07", size = 5972659 },
|
| 305 |
-
{ url = "https://files.pythonhosted.org/packages/19/77/538f202862b9183f54108557bfda67e17603fc560c384559e769321c9d92/numpy-1.26.4-cp310-cp310-win_amd64.whl", hash = "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5", size = 15808905 },
|
| 306 |
-
]
|
| 307 |
-
|
| 308 |
-
[[package]]
|
| 309 |
-
name = "nvidia-cublas-cu11"
|
| 310 |
-
version = "11.11.3.6"
|
| 311 |
source = { registry = "https://pypi.org/simple" }
|
|
|
|
| 312 |
wheels = [
|
| 313 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 314 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 315 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 316 |
-
{ url = "https://files.pythonhosted.org/packages/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
]
|
| 318 |
|
| 319 |
[[package]]
|
|
@@ -346,40 +309,18 @@ version = "12.1.105"
|
|
| 346 |
source = { registry = "https://pypi.org/simple" }
|
| 347 |
wheels = [
|
| 348 |
{ url = "https://files.pythonhosted.org/packages/eb/d5/c68b1d2cdfcc59e72e8a5949a37ddb22ae6cade80cd4a57a84d4c8b55472/nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40", size = 823596 },
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
[[package]]
|
| 352 |
-
name = "nvidia-cudnn-cu11"
|
| 353 |
-
version = "9.5.0.50"
|
| 354 |
-
source = { registry = "https://pypi.org/simple" }
|
| 355 |
-
dependencies = [
|
| 356 |
-
{ name = "nvidia-cublas-cu11" },
|
| 357 |
-
]
|
| 358 |
-
wheels = [
|
| 359 |
-
{ url = "https://files.pythonhosted.org/packages/a6/d6/ec0bf8fd29c907e68de69248e5dbc3e78b63a613163d54ebfc4ca9362421/nvidia_cudnn_cu11-9.5.0.50-py3-none-manylinux2014_x86_64.whl", hash = "sha256:15ddd1c8e2904f35debb4a0452f4df57eeb9d363e760de5a5c2ca0daba8e545a", size = 558442361 },
|
| 360 |
-
{ url = "https://files.pythonhosted.org/packages/2e/01/e7aeb8f9f2336a44c946e629eeb2d204ec672ecd94a47d3b1abaa10a05f1/nvidia_cudnn_cu11-9.5.0.50-py3-none-win_amd64.whl", hash = "sha256:ecf46a9ca869c1628f0ce91c160d53cbd28f0805dca856f85f3e65b4ef3496b1", size = 553719005 },
|
| 361 |
]
|
| 362 |
|
| 363 |
[[package]]
|
| 364 |
name = "nvidia-cudnn-cu12"
|
| 365 |
-
version = "
|
| 366 |
source = { registry = "https://pypi.org/simple" }
|
| 367 |
dependencies = [
|
| 368 |
{ name = "nvidia-cublas-cu12" },
|
| 369 |
]
|
| 370 |
wheels = [
|
| 371 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 372 |
-
]
|
| 373 |
-
|
| 374 |
-
[[package]]
|
| 375 |
-
name = "nvidia-cufft-cu11"
|
| 376 |
-
version = "10.9.0.58"
|
| 377 |
-
source = { registry = "https://pypi.org/simple" }
|
| 378 |
-
wheels = [
|
| 379 |
-
{ url = "https://files.pythonhosted.org/packages/74/79/b912a77e38e41f15a0581a59f5c3548d1ddfdda3225936fb67c342719e7a/nvidia_cufft_cu11-10.9.0.58-py3-none-manylinux1_x86_64.whl", hash = "sha256:222f9da70c80384632fd6035e4c3f16762d64ea7a843829cb278f98b3cb7dd81", size = 168405414 },
|
| 380 |
-
{ url = "https://files.pythonhosted.org/packages/71/7a/a2ad9951d57c3cc23f4fa6d84b146afd9f375ffbc744b38935930ac4393f/nvidia_cufft_cu11-10.9.0.58-py3-none-manylinux2014_aarch64.whl", hash = "sha256:34b7315104e615b230dc3c2d1861f13bff9ec465c5d3b4bb65b4986d03a1d8d4", size = 111231060 },
|
| 381 |
-
{ url = "https://files.pythonhosted.org/packages/64/c8/133717b43182ba063803e983e7680a94826a9f4ff5734af0ca315803f1b3/nvidia_cufft_cu11-10.9.0.58-py3-none-manylinux2014_x86_64.whl", hash = "sha256:e21037259995243cc370dd63c430d77ae9280bedb68d5b5a18226bfc92e5d748", size = 168405419 },
|
| 382 |
-
{ url = "https://files.pythonhosted.org/packages/f8/b4/e432a74f8db0e84f734dc14d36c0e529225132bf7e239da21f55893351a6/nvidia_cufft_cu11-10.9.0.58-py3-none-win_amd64.whl", hash = "sha256:c4d316f17c745ec9c728e30409612eaf77a8404c3733cdf6c9c1569634d1ca03", size = 172237004 },
|
| 383 |
]
|
| 384 |
|
| 385 |
[[package]]
|
|
@@ -411,17 +352,6 @@ wheels = [
|
|
| 411 |
{ url = "https://files.pythonhosted.org/packages/bc/1d/8de1e5c67099015c834315e333911273a8c6aaba78923dd1d1e25fc5f217/nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd", size = 124161928 },
|
| 412 |
]
|
| 413 |
|
| 414 |
-
[[package]]
|
| 415 |
-
name = "nvidia-cusparse-cu11"
|
| 416 |
-
version = "11.7.5.86"
|
| 417 |
-
source = { registry = "https://pypi.org/simple" }
|
| 418 |
-
wheels = [
|
| 419 |
-
{ url = "https://files.pythonhosted.org/packages/c1/e0/21b829c535d569831835a4ca5d049a19ba00d3e91f3e12ab4ad27bd7385f/nvidia_cusparse_cu11-11.7.5.86-py3-none-manylinux1_x86_64.whl", hash = "sha256:4ae709fe78d3f23f60acaba8c54b8ad556cf16ca486e0cc1aa92dca7555d2d2b", size = 204126221 },
|
| 420 |
-
{ url = "https://files.pythonhosted.org/packages/a2/6e/4eb2842e7ab1804072bca43030a8b92731e5a35f6a4a1b8f1aa8fa5f411c/nvidia_cusparse_cu11-11.7.5.86-py3-none-manylinux2014_aarch64.whl", hash = "sha256:6c7da46abee7567e619d4aa2e90a1b032cfcbd1211d429853b1a6e87514a14b2", size = 203917797 },
|
| 421 |
-
{ url = "https://files.pythonhosted.org/packages/ed/5c/b0333b07c51ced77397c2fb0d9826072cea0da9d421aa7e792aa0f8ecc72/nvidia_cusparse_cu11-11.7.5.86-py3-none-manylinux2014_x86_64.whl", hash = "sha256:8d7cf1628fd8d462b5d2ba6678fae34733a48ecb80495b9c68672ec6a6dde5ef", size = 204126227 },
|
| 422 |
-
{ url = "https://files.pythonhosted.org/packages/b8/36/a670e8ca1deccd3c63be4d0286491cf5c6375253f0d948e7cc5167fe1da9/nvidia_cusparse_cu11-11.7.5.86-py3-none-win_amd64.whl", hash = "sha256:a0f6ee81cd91be606fc2f55992d06b09cd4e86d74b6ae5e8dd1631cf7f5a8706", size = 203420667 },
|
| 423 |
-
]
|
| 424 |
-
|
| 425 |
[[package]]
|
| 426 |
name = "nvidia-cusparse-cu12"
|
| 427 |
version = "12.1.0.106"
|
|
@@ -433,20 +363,13 @@ wheels = [
|
|
| 433 |
{ url = "https://files.pythonhosted.org/packages/65/5b/cfaeebf25cd9fdec14338ccb16f6b2c4c7fa9163aefcf057d86b9cc248bb/nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c", size = 195958278 },
|
| 434 |
]
|
| 435 |
|
| 436 |
-
[[package]]
|
| 437 |
-
name = "nvidia-nccl-cu11"
|
| 438 |
-
version = "2.21.5"
|
| 439 |
-
source = { registry = "https://pypi.org/simple" }
|
| 440 |
-
wheels = [
|
| 441 |
-
{ url = "https://files.pythonhosted.org/packages/ac/9a/8b6a28b3b87d5fddab0e92cd835339eb8fbddaa71ae67518c8c1b3d05bae/nvidia_nccl_cu11-2.21.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:49d8350629c7888701d1fd200934942671cb5c728f49acc5a0b3a768820bed29", size = 147811630 },
|
| 442 |
-
]
|
| 443 |
-
|
| 444 |
[[package]]
|
| 445 |
name = "nvidia-nccl-cu12"
|
| 446 |
-
version = "2.
|
| 447 |
source = { registry = "https://pypi.org/simple" }
|
| 448 |
wheels = [
|
| 449 |
-
{ url = "https://files.pythonhosted.org/packages/
|
|
|
|
| 450 |
]
|
| 451 |
|
| 452 |
[[package]]
|
|
@@ -480,79 +403,22 @@ wheels = [
|
|
| 480 |
]
|
| 481 |
|
| 482 |
[[package]]
|
| 483 |
-
name = "
|
| 484 |
-
version = "1.
|
| 485 |
-
source = { registry = "https://pypi.org/simple" }
|
| 486 |
-
dependencies = [
|
| 487 |
-
{ name = "accelerate" },
|
| 488 |
-
{ name = "diffusers" },
|
| 489 |
-
{ name = "torch" },
|
| 490 |
-
{ name = "transformers" },
|
| 491 |
-
]
|
| 492 |
-
sdist = { url = "https://files.pythonhosted.org/packages/d5/30/b493cbca73c5cdd2499a3ec37e451fa4c826e02be6bb6bc66c44d98293cc/onediff-1.2.0.tar.gz", hash = "sha256:8655b18698ca252093c06db53d4620de4065d1e507f9d7b1cbe3f123ed17bd26", size = 73823417 }
|
| 493 |
-
wheels = [
|
| 494 |
-
{ url = "https://files.pythonhosted.org/packages/3c/22/5b6de9dae4b8ed161bfcab9a7d6d7660cca4ee1d7db733950b67905f7f66/onediff-1.2.0-py3-none-any.whl", hash = "sha256:84bb2285ae7f38e450229779ab96b591e4f530e1f22b7320f41b06f7aec181a3", size = 104932 },
|
| 495 |
-
]
|
| 496 |
-
|
| 497 |
-
[[package]]
|
| 498 |
-
name = "onediffx"
|
| 499 |
-
version = "1.2.0"
|
| 500 |
source = { registry = "https://pypi.org/simple" }
|
| 501 |
-
dependencies = [
|
| 502 |
-
{ name = "accelerate" },
|
| 503 |
-
{ name = "diffusers" },
|
| 504 |
-
{ name = "omegaconf" },
|
| 505 |
-
{ name = "onefx" },
|
| 506 |
-
{ name = "torch" },
|
| 507 |
-
{ name = "transformers" },
|
| 508 |
-
]
|
| 509 |
-
sdist = { url = "https://files.pythonhosted.org/packages/b9/e1/b0797217862a9eb48bdb5f3e53ed8c9b69fe582de1e7c3a4b948f7ed862a/onediffx-1.2.0.tar.gz", hash = "sha256:3e0934e71f966d41d04951826ae36ebfb25be8c23ca7f5e6f7b177fd92f0791b", size = 95945 }
|
| 510 |
-
wheels = [
|
| 511 |
-
{ url = "https://files.pythonhosted.org/packages/f4/dd/412be6d3646355b5e5b75066f715fc34d0bbdcfbf18d0934b9f107c59239/onediffx-1.2.0-py3-none-any.whl", hash = "sha256:86dea026ac30bc04a2f4cd25ac4a511b1c4b4154657bc48c7e05f4a238085f14", size = 67899 },
|
| 512 |
-
]
|
| 513 |
-
|
| 514 |
-
[[package]]
|
| 515 |
-
name = "oneflow"
|
| 516 |
-
version = "0.9.1.dev20240802+cu118"
|
| 517 |
-
source = { url = "https://github.com/siliconflow/oneflow_releases/releases/download/community_cu118/oneflow-0.9.1.dev20240802%2Bcu118-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" }
|
| 518 |
dependencies = [
|
| 519 |
{ name = "numpy" },
|
| 520 |
-
{ name = "nvidia-cublas-cu11" },
|
| 521 |
-
{ name = "nvidia-cudnn-cu11" },
|
| 522 |
-
{ name = "nvidia-cufft-cu11" },
|
| 523 |
-
{ name = "nvidia-cusparse-cu11" },
|
| 524 |
-
{ name = "nvidia-nccl-cu11" },
|
| 525 |
-
{ name = "pillow" },
|
| 526 |
{ name = "protobuf" },
|
| 527 |
-
{ name = "requests" },
|
| 528 |
-
{ name = "rich" },
|
| 529 |
-
{ name = "tqdm" },
|
| 530 |
]
|
|
|
|
| 531 |
wheels = [
|
| 532 |
-
{ url = "https://
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
{ name = "numpy", specifier = ">=1.22.1" },
|
| 538 |
-
{ name = "nvidia-cublas-cu11" },
|
| 539 |
-
{ name = "nvidia-cudnn-cu11" },
|
| 540 |
-
{ name = "nvidia-cufft-cu11" },
|
| 541 |
-
{ name = "nvidia-cusparse-cu11" },
|
| 542 |
-
{ name = "nvidia-nccl-cu11" },
|
| 543 |
-
{ name = "pillow" },
|
| 544 |
-
{ name = "protobuf", specifier = ">=3.9.2" },
|
| 545 |
-
{ name = "requests" },
|
| 546 |
-
{ name = "rich" },
|
| 547 |
-
{ name = "tqdm" },
|
| 548 |
]
|
| 549 |
|
| 550 |
-
[[package]]
|
| 551 |
-
name = "onefx"
|
| 552 |
-
version = "0.0.3"
|
| 553 |
-
source = { registry = "https://pypi.org/simple" }
|
| 554 |
-
sdist = { url = "https://files.pythonhosted.org/packages/3c/c4/2dc5fd8ab613d32e24d54204b4d448b315fa50e5f1ce3197938f2219fa09/onefx-0.0.3.tar.gz", hash = "sha256:d3f3f816891cdfdb6d69e4a78b72d5cafd7d40e165ce419f89f855f47a3dbe34", size = 147260 }
|
| 555 |
-
|
| 556 |
[[package]]
|
| 557 |
name = "packaging"
|
| 558 |
version = "24.1"
|
|
@@ -588,6 +454,14 @@ wheels = [
|
|
| 588 |
{ url = "https://files.pythonhosted.org/packages/ec/3d/c32a51d848401bd94cabb8767a39621496491ee7cd5199856b77da9b18ad/pillow-11.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:224aaa38177597bb179f3ec87eeefcce8e4f85e608025e9cfac60de237ba6316", size = 2567508 },
|
| 589 |
]
|
| 590 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 591 |
[[package]]
|
| 592 |
name = "protobuf"
|
| 593 |
version = "5.28.3"
|
|
@@ -662,15 +536,6 @@ wheels = [
|
|
| 662 |
{ url = "https://files.pythonhosted.org/packages/a9/f9/b6bcaf874f410564a78908739c80861a171788ef4d4f76f5009656672dfe/pydantic_core-2.23.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:9a5bce9d23aac8f0cf0836ecfc033896aa8443b501c58d0602dbfd5bd5b37753", size = 1920344 },
|
| 663 |
]
|
| 664 |
|
| 665 |
-
[[package]]
|
| 666 |
-
name = "pygments"
|
| 667 |
-
version = "2.18.0"
|
| 668 |
-
source = { registry = "https://pypi.org/simple" }
|
| 669 |
-
sdist = { url = "https://files.pythonhosted.org/packages/8e/62/8336eff65bcbc8e4cb5d05b55faf041285951b6e80f33e2bff2024788f31/pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199", size = 4891905 }
|
| 670 |
-
wheels = [
|
| 671 |
-
{ url = "https://files.pythonhosted.org/packages/f7/3f/01c8b82017c199075f8f788d0d906b9ffbbc5a47dc9918a945e13d5a2bda/pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a", size = 1205513 },
|
| 672 |
-
]
|
| 673 |
-
|
| 674 |
[[package]]
|
| 675 |
name = "pyyaml"
|
| 676 |
version = "6.0.2"
|
|
@@ -727,20 +592,6 @@ wheels = [
|
|
| 727 |
{ url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 },
|
| 728 |
]
|
| 729 |
|
| 730 |
-
[[package]]
|
| 731 |
-
name = "rich"
|
| 732 |
-
version = "13.9.3"
|
| 733 |
-
source = { registry = "https://pypi.org/simple" }
|
| 734 |
-
dependencies = [
|
| 735 |
-
{ name = "markdown-it-py" },
|
| 736 |
-
{ name = "pygments" },
|
| 737 |
-
{ name = "typing-extensions" },
|
| 738 |
-
]
|
| 739 |
-
sdist = { url = "https://files.pythonhosted.org/packages/d9/e9/cf9ef5245d835065e6673781dbd4b8911d352fb770d56cf0879cf11b7ee1/rich-13.9.3.tar.gz", hash = "sha256:bc1e01b899537598cf02579d2b9f4a415104d3fc439313a7a2c165d76557a08e", size = 222889 }
|
| 740 |
-
wheels = [
|
| 741 |
-
{ url = "https://files.pythonhosted.org/packages/9a/e2/10e9819cf4a20bd8ea2f5dabafc2e6bf4a78d6a0965daeb60a4b34d1c11f/rich-13.9.3-py3-none-any.whl", hash = "sha256:9836f5096eb2172c9e77df411c1b009bace4193d6a481d534fea75ebba758283", size = 242157 },
|
| 742 |
-
]
|
| 743 |
-
|
| 744 |
[[package]]
|
| 745 |
name = "safetensors"
|
| 746 |
version = "0.4.5"
|
|
@@ -778,41 +629,50 @@ wheels = [
|
|
| 778 |
]
|
| 779 |
|
| 780 |
[[package]]
|
| 781 |
-
name = "
|
| 782 |
-
version = "1.
|
| 783 |
-
source = {
|
| 784 |
dependencies = [
|
| 785 |
-
{ name = "
|
| 786 |
]
|
|
|
|
| 787 |
wheels = [
|
| 788 |
-
{ url = "https://
|
| 789 |
-
]
|
| 790 |
-
|
| 791 |
-
[package.metadata]
|
| 792 |
-
requires-dist = [
|
| 793 |
-
{ name = "diffusers", marker = "extra == 'diffusers'", specifier = ">=0.19.0" },
|
| 794 |
-
{ name = "numpy", marker = "extra == 'dev'" },
|
| 795 |
-
{ name = "opencv-python", marker = "extra == 'dev'" },
|
| 796 |
-
{ name = "pillow", marker = "extra == 'dev'" },
|
| 797 |
-
{ name = "prettytable", marker = "extra == 'dev'" },
|
| 798 |
-
{ name = "pytest", marker = "extra == 'dev'" },
|
| 799 |
-
{ name = "torch" },
|
| 800 |
-
{ name = "transformers", marker = "extra == 'diffusers'" },
|
| 801 |
-
{ name = "triton", marker = "extra == 'triton'", specifier = ">=2.1.0" },
|
| 802 |
-
{ name = "xformers", marker = "extra == 'xformers'", specifier = ">=0.0.20" },
|
| 803 |
]
|
| 804 |
|
| 805 |
[[package]]
|
| 806 |
-
name = "
|
| 807 |
-
version = "
|
| 808 |
source = { registry = "https://pypi.org/simple" }
|
| 809 |
dependencies = [
|
| 810 |
-
{ name = "
|
| 811 |
]
|
| 812 |
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 813 |
wheels = [
|
| 814 |
-
{ url = "https://files.pythonhosted.org/packages/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 815 |
]
|
|
|
|
| 816 |
|
| 817 |
[[package]]
|
| 818 |
name = "tokenizers"
|
|
@@ -844,21 +704,9 @@ wheels = [
|
|
| 844 |
{ url = "https://files.pythonhosted.org/packages/45/b6/36c1bb106bbe96012c9367df89ed01599cada036c0b96d38fbbdbeb75c9f/tokenizers-0.19.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:43350270bfc16b06ad3f6f07eab21f089adb835544417afda0f83256a8bf8b75", size = 9945103 },
|
| 845 |
]
|
| 846 |
|
| 847 |
-
[[package]]
|
| 848 |
-
name = "tomesd"
|
| 849 |
-
version = "0.1.3"
|
| 850 |
-
source = { registry = "https://pypi.org/simple" }
|
| 851 |
-
dependencies = [
|
| 852 |
-
{ name = "torch" },
|
| 853 |
-
]
|
| 854 |
-
sdist = { url = "https://files.pythonhosted.org/packages/29/37/ed74c7449fe5a8a4726be3dab4d879d000babf6ea538658171933b1f000e/tomesd-0.1.3.tar.gz", hash = "sha256:15bba2e952f4643c8355951e892fda918ddccbdff2238dc368d42bd078fcedc9", size = 14032 }
|
| 855 |
-
wheels = [
|
| 856 |
-
{ url = "https://files.pythonhosted.org/packages/0c/02/367c67c8f510313f143a7818e92254a5f861c7d94c98ad6a08d25db52fee/tomesd-0.1.3-py3-none-any.whl", hash = "sha256:3d5aa0857fe2c2aab253891050601ca13a87d8d7a99b6760b9ca0856aa0c6355", size = 11467 },
|
| 857 |
-
]
|
| 858 |
-
|
| 859 |
[[package]]
|
| 860 |
name = "torch"
|
| 861 |
-
version = "2.
|
| 862 |
source = { registry = "https://pypi.org/simple" }
|
| 863 |
dependencies = [
|
| 864 |
{ name = "filelock" },
|
|
@@ -881,28 +729,10 @@ dependencies = [
|
|
| 881 |
{ name = "typing-extensions" },
|
| 882 |
]
|
| 883 |
wheels = [
|
| 884 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 885 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 886 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 887 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 888 |
-
{ url = "https://files.pythonhosted.org/packages/33/6b/21496316c9b8242749ee2a9064406271efdf979e91d440e8a3806b5e84bf/torch-2.2.2-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:877b3e6593b5e00b35bbe111b7057464e76a7dd186a287280d941b564b0563c2", size = 59707286 },
|
| 889 |
-
]
|
| 890 |
-
|
| 891 |
-
[[package]]
|
| 892 |
-
name = "torchvision"
|
| 893 |
-
version = "0.17.2"
|
| 894 |
-
source = { registry = "https://pypi.org/simple" }
|
| 895 |
-
dependencies = [
|
| 896 |
-
{ name = "numpy" },
|
| 897 |
-
{ name = "pillow" },
|
| 898 |
-
{ name = "torch" },
|
| 899 |
-
]
|
| 900 |
-
wheels = [
|
| 901 |
-
{ url = "https://files.pythonhosted.org/packages/a2/70/c781e0433ab7b8f6f693580e5065ae17c3785b2836200311765f99535ef8/torchvision-0.17.2-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:1f2910fe3c21ad6875b2720d46fad835b2e4b336e9553d31ca364d24c90b1d4f", size = 1666426 },
|
| 902 |
-
{ url = "https://files.pythonhosted.org/packages/64/3d/a0385fd301e6e2eefb1d9cf2ac97d0c33ebf4a764aba0f066e6e16324916/torchvision-0.17.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ecc1c503fa8a54fbab777e06a7c228032b8ab78efebf35b28bc8f22f544f51f1", size = 1571154 },
|
| 903 |
-
{ url = "https://files.pythonhosted.org/packages/e0/2f/d13cb0ffc4808f85b880ef66ab6cfef10bd35e5c151dae68ea18cf6bf636/torchvision-0.17.2-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:f400145fc108833e7c2fc28486a04989ca742146d7a2a2cc48878ebbb40cdbbd", size = 6915896 },
|
| 904 |
-
{ url = "https://files.pythonhosted.org/packages/fb/41/c8a440ebb1d4958baf02f08f6df56a3a30bea3fdcdc99076cb7da023babe/torchvision-0.17.2-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:e9e4bed404af33dfc92eecc2b513d21ddc4c242a7fd8708b3b09d3a26aa6f444", size = 14008514 },
|
| 905 |
-
{ url = "https://files.pythonhosted.org/packages/0a/b8/027b3c36e61a26d14d4e9a8413d4a09e5fd8d3e01e3efce78447ca1dc3dd/torchvision-0.17.2-cp310-cp310-win_amd64.whl", hash = "sha256:ba2e62f233eab3d42b648c122a3a29c47cc108ca314dfd5cbb59cd3a143fd623", size = 1165527 },
|
| 906 |
]
|
| 907 |
|
| 908 |
[[package]]
|
|
@@ -940,13 +770,13 @@ wheels = [
|
|
| 940 |
|
| 941 |
[[package]]
|
| 942 |
name = "triton"
|
| 943 |
-
version = "
|
| 944 |
source = { registry = "https://pypi.org/simple" }
|
| 945 |
dependencies = [
|
| 946 |
{ name = "filelock" },
|
| 947 |
]
|
| 948 |
wheels = [
|
| 949 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 950 |
]
|
| 951 |
|
| 952 |
[[package]]
|
|
@@ -968,17 +798,12 @@ wheels = [
|
|
| 968 |
]
|
| 969 |
|
| 970 |
[[package]]
|
| 971 |
-
name = "
|
| 972 |
-
version = "0.0
|
| 973 |
source = { registry = "https://pypi.org/simple" }
|
| 974 |
-
|
| 975 |
-
{ name = "numpy" },
|
| 976 |
-
{ name = "torch" },
|
| 977 |
-
]
|
| 978 |
-
sdist = { url = "https://files.pythonhosted.org/packages/ca/66/f5977922658ff2aea8b3222901fa0b9922778988c6d3a428cfd083892191/xformers-0.0.25.post1.tar.gz", hash = "sha256:397430bd0162fd5a75eb8bc50b0ba242200881e48fd6404a19376f853f8c0444", size = 4083274 }
|
| 979 |
wheels = [
|
| 980 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 981 |
-
{ url = "https://files.pythonhosted.org/packages/25/19/301789926809dc167ac104c29e26703369b3d47e6c1a2861db9efccbdc10/xformers-0.0.25.post1-cp310-cp310-win_amd64.whl", hash = "sha256:ddc22273f2ff06b886d9e86f17997e4f1f3074fdeb5d46bcdf50b704430df528", size = 208697268 },
|
| 982 |
]
|
| 983 |
|
| 984 |
[[package]]
|
|
|
|
| 34 |
source = { registry = "https://pypi.org/simple" }
|
| 35 |
sdist = { url = "https://files.pythonhosted.org/packages/3e/38/7859ff46355f76f8d19459005ca000b6e7012f2f1ca597746cbcd1fbfe5e/antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b", size = 117034 }
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
[[package]]
|
| 38 |
name = "certifi"
|
| 39 |
version = "2024.8.30"
|
|
|
|
| 76 |
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 },
|
| 77 |
]
|
| 78 |
|
| 79 |
+
[[package]]
|
| 80 |
+
name = "cuda-python"
|
| 81 |
+
version = "12.6.0"
|
| 82 |
+
source = { registry = "https://pypi.org/simple" }
|
| 83 |
+
wheels = [
|
| 84 |
+
{ url = "https://files.pythonhosted.org/packages/0b/a3/ad3148d068d78e8ad1e40094ab787338ea4bef06fbe2915cf1557a5c5f98/cuda_python-12.6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dee03e2ba78a807a907a7939dddf089bb8a780faaf7ccbcbfc2461090af11e78", size = 23793330 },
|
| 85 |
+
{ url = "https://files.pythonhosted.org/packages/86/93/f00a5f48eb67216d8a8818b93c0e8bbe5949f297add3367522081ec5223c/cuda_python-12.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e177f584094d9c9fd9c7d153168486a3966765c79cb2a80e86feb15e3b5adc14", size = 24223726 },
|
| 86 |
+
{ url = "https://files.pythonhosted.org/packages/f6/e0/c2302ff6796eac6c6f1e1414f163c6a38deba62af0b7df2b77562656188c/cuda_python-12.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:3b1e9711c6455fabd947076d52eb21ea508ade95eb4dd33838b0339a84238125", size = 9995130 },
|
| 87 |
+
]
|
| 88 |
+
|
| 89 |
[[package]]
|
| 90 |
name = "diffusers"
|
| 91 |
+
version = "0.30.2"
|
| 92 |
source = { registry = "https://pypi.org/simple" }
|
| 93 |
dependencies = [
|
| 94 |
{ name = "filelock" },
|
|
|
|
| 100 |
{ name = "requests" },
|
| 101 |
{ name = "safetensors" },
|
| 102 |
]
|
| 103 |
+
sdist = { url = "https://files.pythonhosted.org/packages/04/ee/13a6327f04f21420ab4d8ada635aba7d884bf57b09f9b847b9af3818b348/diffusers-0.30.2.tar.gz", hash = "sha256:641875f78f36bdfa4b9af752b124d1fd6d431eadd5547fe0a3f354ae0af2636c", size = 2095560 }
|
| 104 |
wheels = [
|
| 105 |
+
{ url = "https://files.pythonhosted.org/packages/2f/ee/f67b0888229be96a276257579a58eb2331733d246fdb8620e09ca7253971/diffusers-0.30.2-py3-none-any.whl", hash = "sha256:739826043147c2b59560944591dfdea5d24cd4fb15e751abbe20679a289bece8", size = 2636928 },
|
| 106 |
]
|
| 107 |
|
| 108 |
[[package]]
|
|
|
|
| 111 |
source = { editable = "." }
|
| 112 |
dependencies = [
|
| 113 |
{ name = "accelerate" },
|
| 114 |
+
{ name = "cuda-python" },
|
| 115 |
{ name = "diffusers" },
|
| 116 |
{ name = "edge-maxxing-pipelines" },
|
|
|
|
|
|
|
| 117 |
{ name = "omegaconf" },
|
| 118 |
+
{ name = "onnx" },
|
| 119 |
+
{ name = "polygraphy" },
|
|
|
|
| 120 |
{ name = "setuptools" },
|
| 121 |
+
{ name = "tensorrt" },
|
| 122 |
+
{ name = "tensorrt-cu12-bindings" },
|
| 123 |
+
{ name = "tensorrt-cu12-libs" },
|
| 124 |
{ name = "torch" },
|
|
|
|
| 125 |
{ name = "transformers" },
|
| 126 |
+
{ name = "wheel" },
|
|
|
|
| 127 |
]
|
| 128 |
|
| 129 |
[package.metadata]
|
| 130 |
requires-dist = [
|
| 131 |
{ name = "accelerate", specifier = "==0.31.0" },
|
| 132 |
+
{ name = "cuda-python", specifier = ">=12.6.0" },
|
| 133 |
+
{ name = "diffusers", specifier = "==0.30.2" },
|
| 134 |
+
{ name = "edge-maxxing-pipelines", git = "https://github.com/womboai/edge-maxxing?subdirectory=pipelines&rev=8d8ff45863416484b5b4bc547782591bbdfc696a#8d8ff45863416484b5b4bc547782591bbdfc696a" },
|
|
|
|
|
|
|
| 135 |
{ name = "omegaconf", specifier = "==2.3.0" },
|
| 136 |
+
{ name = "onnx" },
|
| 137 |
+
{ name = "polygraphy" },
|
|
|
|
| 138 |
{ name = "setuptools", specifier = ">=75.2.0" },
|
| 139 |
+
{ name = "tensorrt", specifier = ">=10.5.0" },
|
| 140 |
+
{ name = "tensorrt-cu12-bindings", specifier = ">=10.5.0" },
|
| 141 |
+
{ name = "tensorrt-cu12-libs", specifier = ">=10.5.0" },
|
| 142 |
+
{ name = "torch", specifier = "==2.4.1" },
|
| 143 |
{ name = "transformers", specifier = "==4.41.2" },
|
| 144 |
+
{ name = "wheel" },
|
|
|
|
| 145 |
]
|
| 146 |
|
| 147 |
[[package]]
|
| 148 |
name = "edge-maxxing-pipelines"
|
| 149 |
version = "1.0.0"
|
| 150 |
+
source = { git = "https://github.com/womboai/edge-maxxing?subdirectory=pipelines&rev=8d8ff45863416484b5b4bc547782591bbdfc696a#8d8ff45863416484b5b4bc547782591bbdfc696a" }
|
| 151 |
dependencies = [
|
| 152 |
{ name = "pydantic" },
|
| 153 |
]
|
|
|
|
| 172 |
|
| 173 |
[[package]]
|
| 174 |
name = "huggingface-hub"
|
| 175 |
+
version = "0.26.1"
|
| 176 |
source = { registry = "https://pypi.org/simple" }
|
| 177 |
dependencies = [
|
| 178 |
{ name = "filelock" },
|
|
|
|
| 183 |
{ name = "tqdm" },
|
| 184 |
{ name = "typing-extensions" },
|
| 185 |
]
|
| 186 |
+
sdist = { url = "https://files.pythonhosted.org/packages/44/99/c8fdef6fe09a1719e5e5de24b012de5824889168c96143f5531cab5af42b/huggingface_hub-0.26.1.tar.gz", hash = "sha256:414c0d9b769eecc86c70f9d939d0f48bb28e8461dd1130021542eff0212db890", size = 375458 }
|
| 187 |
wheels = [
|
| 188 |
+
{ url = "https://files.pythonhosted.org/packages/d7/4d/017d8d7cff5100092da8ea19139bcb1965bbadcbb5ddd0480e2badc299e8/huggingface_hub-0.26.1-py3-none-any.whl", hash = "sha256:5927a8fc64ae68859cd954b7cc29d1c8390a5e15caba6d3d349c973be8fdacf3", size = 447439 },
|
| 189 |
]
|
| 190 |
|
| 191 |
[[package]]
|
|
|
|
| 221 |
{ url = "https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d", size = 133271 },
|
| 222 |
]
|
| 223 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
[[package]]
|
| 225 |
name = "markupsafe"
|
| 226 |
version = "3.0.2"
|
|
|
|
| 239 |
{ url = "https://files.pythonhosted.org/packages/44/06/e7175d06dd6e9172d4a69a72592cb3f7a996a9c396eee29082826449bbc3/MarkupSafe-3.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:6af100e168aa82a50e186c82875a5893c5597a0c1ccdb0d8b40240b1f28b969a", size = 15514 },
|
| 240 |
]
|
| 241 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
[[package]]
|
| 243 |
name = "mpmath"
|
| 244 |
version = "1.3.0"
|
|
|
|
| 259 |
|
| 260 |
[[package]]
|
| 261 |
name = "numpy"
|
| 262 |
+
version = "2.1.2"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
source = { registry = "https://pypi.org/simple" }
|
| 264 |
+
sdist = { url = "https://files.pythonhosted.org/packages/4b/d1/8a730ea07f4a37d94f9172f4ce1d81064b7a64766b460378be278952de75/numpy-2.1.2.tar.gz", hash = "sha256:13532a088217fa624c99b843eeb54640de23b3414b14aa66d023805eb731066c", size = 18878063 }
|
| 265 |
wheels = [
|
| 266 |
+
{ url = "https://files.pythonhosted.org/packages/1c/a2/40a76d357f168e9f9f06d6cc2c8e22dd5fb2bfbe63fe2c433057258c145a/numpy-2.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:30d53720b726ec36a7f88dc873f0eec8447fbc93d93a8f079dfac2629598d6ee", size = 21150947 },
|
| 267 |
+
{ url = "https://files.pythonhosted.org/packages/b5/d0/ba271ea9108d7278d3889a7eb38d77370a88713fb94339964e71ac184d4a/numpy-2.1.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e8d3ca0a72dd8846eb6f7dfe8f19088060fcb76931ed592d29128e0219652884", size = 13758184 },
|
| 268 |
+
{ url = "https://files.pythonhosted.org/packages/7c/b9/5c6507439cd756201010f7937bf90712c2469052ae094584af14557dd64f/numpy-2.1.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:fc44e3c68ff00fd991b59092a54350e6e4911152682b4782f68070985aa9e648", size = 5354091 },
|
| 269 |
+
{ url = "https://files.pythonhosted.org/packages/60/21/7938cf724d9e84e45fb886f3fc794ab431d71facfebc261e3e9f19f3233a/numpy-2.1.2-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:7c1c60328bd964b53f8b835df69ae8198659e2b9302ff9ebb7de4e5a5994db3d", size = 6887169 },
|
| 270 |
+
{ url = "https://files.pythonhosted.org/packages/09/8d/42a124657f5d31902fca73921b25a0d022cead2b32ce7e6975762cd2995a/numpy-2.1.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6cdb606a7478f9ad91c6283e238544451e3a95f30fb5467fbf715964341a8a86", size = 13888165 },
|
| 271 |
+
{ url = "https://files.pythonhosted.org/packages/fb/25/ba023652a39a2c127200e85aed975fc6119b421e2c348e5d0171e2046edb/numpy-2.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d666cb72687559689e9906197e3bec7b736764df6a2e58ee265e360663e9baf7", size = 16326954 },
|
| 272 |
+
{ url = "https://files.pythonhosted.org/packages/34/58/23e6b07fad492b7c47cf09cd8bad6983658f0f925b6c535fd008e3e86274/numpy-2.1.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c6eef7a2dbd0abfb0d9eaf78b73017dbfd0b54051102ff4e6a7b2980d5ac1a03", size = 16702916 },
|
| 273 |
+
{ url = "https://files.pythonhosted.org/packages/91/24/37b5cf2dc7d385ac97f7b7fe50cba312abb70a2a5eac74c23af028811f73/numpy-2.1.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:12edb90831ff481f7ef5f6bc6431a9d74dc0e5ff401559a71e5e4611d4f2d466", size = 14384372 },
|
| 274 |
+
{ url = "https://files.pythonhosted.org/packages/ea/ec/0f6d471058a01d1a05a50d2793898de1549280fa715a8537987ee866b5d9/numpy-2.1.2-cp310-cp310-win32.whl", hash = "sha256:a65acfdb9c6ebb8368490dbafe83c03c7e277b37e6857f0caeadbbc56e12f4fb", size = 6535361 },
|
| 275 |
+
{ url = "https://files.pythonhosted.org/packages/c2/3d/293cc5927f916a7bc6bf74da8f6defab63d1b13f0959d7e21878ad8a20d8/numpy-2.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:860ec6e63e2c5c2ee5e9121808145c7bf86c96cca9ad396c0bd3e0f2798ccbe2", size = 12865501 },
|
| 276 |
+
{ url = "https://files.pythonhosted.org/packages/73/c9/3e1d6bbe6d3d2e2c5a9483b24b2f29a229b323f62054278a3bba7fee11e5/numpy-2.1.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:bdd407c40483463898b84490770199d5714dcc9dd9b792f6c6caccc523c00952", size = 20981945 },
|
| 277 |
+
{ url = "https://files.pythonhosted.org/packages/6e/62/989c4988bde1a8e08117fccc3bab73d2886421fb98cde597168714f3c54e/numpy-2.1.2-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:da65fb46d4cbb75cb417cddf6ba5e7582eb7bb0b47db4b99c9fe5787ce5d91f5", size = 6750558 },
|
| 278 |
+
{ url = "https://files.pythonhosted.org/packages/53/b1/00ef9f30975f1312a53257f68e57b4513d14d537e03d507e2773a684b1e8/numpy-2.1.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c193d0b0238638e6fc5f10f1b074a6993cb13b0b431f64079a509d63d3aa8b7", size = 16141552 },
|
| 279 |
+
{ url = "https://files.pythonhosted.org/packages/c0/ec/0c04903b48dfea6be1d7b47ba70f98709fb7198fd970784a1400c391d522/numpy-2.1.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a7d80b2e904faa63068ead63107189164ca443b42dd1930299e0d1cb041cec2e", size = 12789924 },
|
| 280 |
]
|
| 281 |
|
| 282 |
[[package]]
|
|
|
|
| 309 |
source = { registry = "https://pypi.org/simple" }
|
| 310 |
wheels = [
|
| 311 |
{ url = "https://files.pythonhosted.org/packages/eb/d5/c68b1d2cdfcc59e72e8a5949a37ddb22ae6cade80cd4a57a84d4c8b55472/nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40", size = 823596 },
|
| 312 |
+
{ url = "https://files.pythonhosted.org/packages/9f/e2/7a2b4b5064af56ea8ea2d8b2776c0f2960d95c88716138806121ae52a9c9/nvidia_cuda_runtime_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:dfb46ef84d73fababab44cf03e3b83f80700d27ca300e537f85f636fac474344", size = 821226 },
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
]
|
| 314 |
|
| 315 |
[[package]]
|
| 316 |
name = "nvidia-cudnn-cu12"
|
| 317 |
+
version = "9.1.0.70"
|
| 318 |
source = { registry = "https://pypi.org/simple" }
|
| 319 |
dependencies = [
|
| 320 |
{ name = "nvidia-cublas-cu12" },
|
| 321 |
]
|
| 322 |
wheels = [
|
| 323 |
+
{ url = "https://files.pythonhosted.org/packages/9f/fd/713452cd72343f682b1c7b9321e23829f00b842ceaedcda96e742ea0b0b3/nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f", size = 664752741 },
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
]
|
| 325 |
|
| 326 |
[[package]]
|
|
|
|
| 352 |
{ url = "https://files.pythonhosted.org/packages/bc/1d/8de1e5c67099015c834315e333911273a8c6aaba78923dd1d1e25fc5f217/nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd", size = 124161928 },
|
| 353 |
]
|
| 354 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
[[package]]
|
| 356 |
name = "nvidia-cusparse-cu12"
|
| 357 |
version = "12.1.0.106"
|
|
|
|
| 363 |
{ url = "https://files.pythonhosted.org/packages/65/5b/cfaeebf25cd9fdec14338ccb16f6b2c4c7fa9163aefcf057d86b9cc248bb/nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c", size = 195958278 },
|
| 364 |
]
|
| 365 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
[[package]]
|
| 367 |
name = "nvidia-nccl-cu12"
|
| 368 |
+
version = "2.20.5"
|
| 369 |
source = { registry = "https://pypi.org/simple" }
|
| 370 |
wheels = [
|
| 371 |
+
{ url = "https://files.pythonhosted.org/packages/c1/bb/d09dda47c881f9ff504afd6f9ca4f502ded6d8fc2f572cacc5e39da91c28/nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01", size = 176238458 },
|
| 372 |
+
{ url = "https://files.pythonhosted.org/packages/4b/2a/0a131f572aa09f741c30ccd45a8e56316e8be8dfc7bc19bf0ab7cfef7b19/nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:057f6bf9685f75215d0c53bf3ac4a10b3e6578351de307abad9e18a99182af56", size = 176249402 },
|
| 373 |
]
|
| 374 |
|
| 375 |
[[package]]
|
|
|
|
| 403 |
]
|
| 404 |
|
| 405 |
[[package]]
|
| 406 |
+
name = "onnx"
|
| 407 |
+
version = "1.17.0"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 408 |
source = { registry = "https://pypi.org/simple" }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
dependencies = [
|
| 410 |
{ name = "numpy" },
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
{ name = "protobuf" },
|
|
|
|
|
|
|
|
|
|
| 412 |
]
|
| 413 |
+
sdist = { url = "https://files.pythonhosted.org/packages/9a/54/0e385c26bf230d223810a9c7d06628d954008a5e5e4b73ee26ef02327282/onnx-1.17.0.tar.gz", hash = "sha256:48ca1a91ff73c1d5e3ea2eef20ae5d0e709bb8a2355ed798ffc2169753013fd3", size = 12165120 }
|
| 414 |
wheels = [
|
| 415 |
+
{ url = "https://files.pythonhosted.org/packages/2e/29/57053ba7787788ac75efb095cfc1ae290436b6d3a26754693cd7ed1b4fac/onnx-1.17.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:38b5df0eb22012198cdcee527cc5f917f09cce1f88a69248aaca22bd78a7f023", size = 16645616 },
|
| 416 |
+
{ url = "https://files.pythonhosted.org/packages/75/0d/831807a18db2a5e8f7813848c59272b904a4ef3939fe4d1288cbce9ea735/onnx-1.17.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d545335cb49d4d8c47cc803d3a805deb7ad5d9094dc67657d66e568610a36d7d", size = 15908420 },
|
| 417 |
+
{ url = "https://files.pythonhosted.org/packages/dd/5b/c4f95dbe652d14aeba9afaceb177e9ffc48ac3c03048dd3f872f26f07e34/onnx-1.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3193a3672fc60f1a18c0f4c93ac81b761bc72fd8a6c2035fa79ff5969f07713e", size = 16046244 },
|
| 418 |
+
{ url = "https://files.pythonhosted.org/packages/08/a9/c1f218085043dccc6311460239e253fa6957cf12ee4b0a56b82014938d0b/onnx-1.17.0-cp310-cp310-win32.whl", hash = "sha256:0141c2ce806c474b667b7e4499164227ef594584da432fd5613ec17c1855e311", size = 14423516 },
|
| 419 |
+
{ url = "https://files.pythonhosted.org/packages/0e/d3/d26ebf590a65686dde6b27fef32493026c5be9e42083340d947395f93405/onnx-1.17.0-cp310-cp310-win_amd64.whl", hash = "sha256:dfd777d95c158437fda6b34758f0877d15b89cbe9ff45affbedc519b35345cf9", size = 14528496 },
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 420 |
]
|
| 421 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 422 |
[[package]]
|
| 423 |
name = "packaging"
|
| 424 |
version = "24.1"
|
|
|
|
| 454 |
{ url = "https://files.pythonhosted.org/packages/ec/3d/c32a51d848401bd94cabb8767a39621496491ee7cd5199856b77da9b18ad/pillow-11.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:224aaa38177597bb179f3ec87eeefcce8e4f85e608025e9cfac60de237ba6316", size = 2567508 },
|
| 455 |
]
|
| 456 |
|
| 457 |
+
[[package]]
|
| 458 |
+
name = "polygraphy"
|
| 459 |
+
version = "0.49.9"
|
| 460 |
+
source = { registry = "https://pypi.org/simple" }
|
| 461 |
+
wheels = [
|
| 462 |
+
{ url = "https://files.pythonhosted.org/packages/4a/f5/a2b20c677c1a856cc9e08cd0b5a5105450ed5253e369e938ddd31d91c547/polygraphy-0.49.9-py2.py3-none-any.whl", hash = "sha256:62ae22825efdd3288222e5b1d2d791fe58e87844fcd848bcd1251fbce02ba956", size = 346910 },
|
| 463 |
+
]
|
| 464 |
+
|
| 465 |
[[package]]
|
| 466 |
name = "protobuf"
|
| 467 |
version = "5.28.3"
|
|
|
|
| 536 |
{ url = "https://files.pythonhosted.org/packages/a9/f9/b6bcaf874f410564a78908739c80861a171788ef4d4f76f5009656672dfe/pydantic_core-2.23.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:9a5bce9d23aac8f0cf0836ecfc033896aa8443b501c58d0602dbfd5bd5b37753", size = 1920344 },
|
| 537 |
]
|
| 538 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 539 |
[[package]]
|
| 540 |
name = "pyyaml"
|
| 541 |
version = "6.0.2"
|
|
|
|
| 592 |
{ url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 },
|
| 593 |
]
|
| 594 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 595 |
[[package]]
|
| 596 |
name = "safetensors"
|
| 597 |
version = "0.4.5"
|
|
|
|
| 629 |
]
|
| 630 |
|
| 631 |
[[package]]
|
| 632 |
+
name = "sympy"
|
| 633 |
+
version = "1.13.3"
|
| 634 |
+
source = { registry = "https://pypi.org/simple" }
|
| 635 |
dependencies = [
|
| 636 |
+
{ name = "mpmath" },
|
| 637 |
]
|
| 638 |
+
sdist = { url = "https://files.pythonhosted.org/packages/11/8a/5a7fd6284fa8caac23a26c9ddf9c30485a48169344b4bd3b0f02fef1890f/sympy-1.13.3.tar.gz", hash = "sha256:b27fd2c6530e0ab39e275fc9b683895367e51d5da91baa8d3d64db2565fec4d9", size = 7533196 }
|
| 639 |
wheels = [
|
| 640 |
+
{ url = "https://files.pythonhosted.org/packages/99/ff/c87e0622b1dadea79d2fb0b25ade9ed98954c9033722eb707053d310d4f3/sympy-1.13.3-py3-none-any.whl", hash = "sha256:54612cf55a62755ee71824ce692986f23c88ffa77207b30c1368eda4a7060f73", size = 6189483 },
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 641 |
]
|
| 642 |
|
| 643 |
[[package]]
|
| 644 |
+
name = "tensorrt"
|
| 645 |
+
version = "10.5.0"
|
| 646 |
source = { registry = "https://pypi.org/simple" }
|
| 647 |
dependencies = [
|
| 648 |
+
{ name = "tensorrt-cu12" },
|
| 649 |
]
|
| 650 |
+
sdist = { url = "https://files.pythonhosted.org/packages/ee/b9/f917eb7dfe02da30bc91206a464c850f4b94a1e14b8f95870074c9b9abea/tensorrt-10.5.0.tar.gz", hash = "sha256:d5c6338d44aeda20250fdbe31f9df8ca152b830f811aaf19d6c4d1dafd18c84b", size = 16401 }
|
| 651 |
+
|
| 652 |
+
[[package]]
|
| 653 |
+
name = "tensorrt-cu12"
|
| 654 |
+
version = "10.5.0"
|
| 655 |
+
source = { registry = "https://pypi.org/simple" }
|
| 656 |
+
sdist = { url = "https://files.pythonhosted.org/packages/22/d5/a4c3e22482d4273e151123990934d7c8d0ba1e4efb9a483eba807cdce279/tensorrt-cu12-10.5.0.tar.gz", hash = "sha256:46edbda08c54c8ffa88c75d75b4761eb9839e81678135e8d1530adc8cef6a61b", size = 18341 }
|
| 657 |
+
|
| 658 |
+
[[package]]
|
| 659 |
+
name = "tensorrt-cu12-bindings"
|
| 660 |
+
version = "10.5.0"
|
| 661 |
+
source = { registry = "https://pypi.org/simple" }
|
| 662 |
wheels = [
|
| 663 |
+
{ url = "https://files.pythonhosted.org/packages/21/be/cab39a2c387887fa87bb8f199d113a10ebd0ba8b052927c2ae43b1495cf6/tensorrt_cu12_bindings-10.5.0-cp310-none-manylinux_2_17_x86_64.whl", hash = "sha256:45a31cc3f25489bb05fc9cb8dae0e63b205bf3da1656c44430f97cf263d5720c", size = 1117215 },
|
| 664 |
+
{ url = "https://files.pythonhosted.org/packages/02/49/36db3b3c0bd0c7dc68964c75b1691b46abe8388708b4da04c3261f8ab7c0/tensorrt_cu12_bindings-10.5.0-cp310-none-manylinux_2_31_aarch64.whl", hash = "sha256:900b87824ebbc9e1059a4a9a5ed3040eb9d74ba9a601674086030d373996692a", size = 1091646 },
|
| 665 |
+
{ url = "https://files.pythonhosted.org/packages/17/df/e95a92fa4d43df918cc8bc681697b1423a988db339af25bafe25068c522d/tensorrt_cu12_bindings-10.5.0-cp310-none-win_amd64.whl", hash = "sha256:2bf2eb6d36ed9fe44a4b416def538775012abec34fdb5a6fb8461dd569717055", size = 769305 },
|
| 666 |
+
]
|
| 667 |
+
|
| 668 |
+
[[package]]
|
| 669 |
+
name = "tensorrt-cu12-libs"
|
| 670 |
+
version = "10.5.0"
|
| 671 |
+
source = { registry = "https://pypi.org/simple" }
|
| 672 |
+
dependencies = [
|
| 673 |
+
{ name = "nvidia-cuda-runtime-cu12" },
|
| 674 |
]
|
| 675 |
+
sdist = { url = "https://files.pythonhosted.org/packages/ff/d2/28d4bdadcb4690e7c051ae23ac5559dffca7ee6bf859ea76c9ab9931ba53/tensorrt_cu12_libs-10.5.0.tar.gz", hash = "sha256:358b3a36c30ab74ad710f227b410206ae94e8d1003c09b75216e39813dac0d9d", size = 630 }
|
| 676 |
|
| 677 |
[[package]]
|
| 678 |
name = "tokenizers"
|
|
|
|
| 704 |
{ url = "https://files.pythonhosted.org/packages/45/b6/36c1bb106bbe96012c9367df89ed01599cada036c0b96d38fbbdbeb75c9f/tokenizers-0.19.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:43350270bfc16b06ad3f6f07eab21f089adb835544417afda0f83256a8bf8b75", size = 9945103 },
|
| 705 |
]
|
| 706 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 707 |
[[package]]
|
| 708 |
name = "torch"
|
| 709 |
+
version = "2.4.1"
|
| 710 |
source = { registry = "https://pypi.org/simple" }
|
| 711 |
dependencies = [
|
| 712 |
{ name = "filelock" },
|
|
|
|
| 729 |
{ name = "typing-extensions" },
|
| 730 |
]
|
| 731 |
wheels = [
|
| 732 |
+
{ url = "https://files.pythonhosted.org/packages/41/05/d540049b1832d1062510efc6829634b7fbef5394c757d8312414fb65a3cb/torch-2.4.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:362f82e23a4cd46341daabb76fba08f04cd646df9bfaf5da50af97cb60ca4971", size = 797072810 },
|
| 733 |
+
{ url = "https://files.pythonhosted.org/packages/a0/12/2162df9c47386ae7cedbc938f9703fee4792d93504fab8608d541e71ece3/torch-2.4.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:e8ac1985c3ff0f60d85b991954cfc2cc25f79c84545aead422763148ed2759e3", size = 89699259 },
|
| 734 |
+
{ url = "https://files.pythonhosted.org/packages/5d/4c/b2a59ff0e265f5ee154f0d81e948b1518b94f545357731e1a3245ee5d45b/torch-2.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:91e326e2ccfb1496e3bee58f70ef605aeb27bd26be07ba64f37dcaac3d070ada", size = 199433813 },
|
| 735 |
+
{ url = "https://files.pythonhosted.org/packages/dc/fb/1333ba666bbd53846638dd75a7a1d4eaf964aff1c482fc046e2311a1b499/torch-2.4.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:d36a8ef100f5bff3e9c3cea934b9e0d7ea277cb8210c7152d34a9a6c5830eadd", size = 62139309 },
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 736 |
]
|
| 737 |
|
| 738 |
[[package]]
|
|
|
|
| 770 |
|
| 771 |
[[package]]
|
| 772 |
name = "triton"
|
| 773 |
+
version = "3.0.0"
|
| 774 |
source = { registry = "https://pypi.org/simple" }
|
| 775 |
dependencies = [
|
| 776 |
{ name = "filelock" },
|
| 777 |
]
|
| 778 |
wheels = [
|
| 779 |
+
{ url = "https://files.pythonhosted.org/packages/45/27/14cc3101409b9b4b9241d2ba7deaa93535a217a211c86c4cc7151fb12181/triton-3.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e1efef76935b2febc365bfadf74bcb65a6f959a9872e5bddf44cc9e0adce1e1a", size = 209376304 },
|
| 780 |
]
|
| 781 |
|
| 782 |
[[package]]
|
|
|
|
| 798 |
]
|
| 799 |
|
| 800 |
[[package]]
|
| 801 |
+
name = "wheel"
|
| 802 |
+
version = "0.44.0"
|
| 803 |
source = { registry = "https://pypi.org/simple" }
|
| 804 |
+
sdist = { url = "https://files.pythonhosted.org/packages/b7/a0/95e9e962c5fd9da11c1e28aa4c0d8210ab277b1ada951d2aee336b505813/wheel-0.44.0.tar.gz", hash = "sha256:a29c3f2817e95ab89aa4660681ad547c0e9547f20e75b0562fe7723c9a2a9d49", size = 100733 }
|
|
|
|
|
|
|
|
|
|
|
|
|
| 805 |
wheels = [
|
| 806 |
+
{ url = "https://files.pythonhosted.org/packages/1b/d1/9babe2ccaecff775992753d8686970b1e2755d21c8a63be73aba7a4e7d77/wheel-0.44.0-py3-none-any.whl", hash = "sha256:2376a90c98cc337d18623527a97c31797bd02bad0033d41547043a1cbfbe448f", size = 67059 },
|
|
|
|
| 807 |
]
|
| 808 |
|
| 809 |
[[package]]
|