IHaBiS commited on
Commit
fdd97e4
·
verified ·
1 Parent(s): 40f7209

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +14 -0
  2. .ipynb_checkpoints/Untitled-checkpoint.ipynb +6 -0
  3. .ipynb_checkpoints/sdxl_int4_aot_quantization-checkpoint.ipynb +771 -0
  4. .ipynb_checkpoints/sdxl_porting_example-checkpoint.ipynb +0 -0
  5. Untitled.ipynb +159 -0
  6. sdxl_ai_pack/device_targeting_configuration.xml +11 -0
  7. sdxl_ai_pack/sdxl_clip_mtk/src/main/assets/model#group_Qualcomm_SM8850/placeholder.txt +0 -0
  8. sdxl_ai_pack/sdxl_clip_mtk/src/main/assets/model#group_other/placeholder.txt +0 -0
  9. sdxl_ai_pack/sdxl_decoder_mtk/src/main/assets/model#group_Qualcomm_SM8850/placeholder.txt +0 -0
  10. sdxl_ai_pack/sdxl_decoder_mtk/src/main/assets/model#group_other/placeholder.txt +0 -0
  11. sdxl_ai_pack/sdxl_diffusion/src/main/assets/model#group_Qualcomm_SM8850/diffusion.tflite +3 -0
  12. sdxl_ai_pack/sdxl_diffusion_mtk/src/main/assets/model#group_Qualcomm_SM8850/placeholder.txt +0 -0
  13. sdxl_ai_pack/sdxl_diffusion_mtk/src/main/assets/model#group_other/placeholder.txt +0 -0
  14. sdxl_ai_pack/sdxl_open_clip_mtk/src/main/assets/model#group_Qualcomm_SM8850/placeholder.txt +0 -0
  15. sdxl_ai_pack/sdxl_open_clip_mtk/src/main/assets/model#group_other/placeholder.txt +0 -0
  16. sdxl_int4_aot_quantization.ipynb +1485 -0
  17. sdxl_porting_example.ipynb +0 -0
  18. sdxl_tflite/cfg_15.0.png +3 -0
  19. sdxl_tflite/cfg_2.0.png +3 -0
  20. sdxl_tflite/cfg_7.5.png +3 -0
  21. sdxl_tflite/cfg_comparison.png +3 -0
  22. sdxl_tflite/clip.tflite +3 -0
  23. sdxl_tflite/decoder.tflite +3 -0
  24. sdxl_tflite/generated_image.jpg +3 -0
  25. sdxl_tflite/playground_00.png +3 -0
  26. sdxl_tflite/playground_01.png +3 -0
  27. sdxl_tflite/playground_02.png +3 -0
  28. sdxl_tflite/playground_03.png +3 -0
  29. sdxl_tflite/playground_grid.png +3 -0
  30. sdxl_tflite/sampler_comparison.png +3 -0
  31. sdxl_tflite/sampler_k_euler.png +3 -0
  32. sdxl_tflite/sampler_k_euler_ancestral.png +3 -0
  33. sdxl_tflite/sampler_k_lms.png +3 -0
  34. sdxl_tflite/text_projection.npy +3 -0
  35. sdxl_tflite_aot_int4_blockwise_32/_compiled_models/diffusion_int4_Qualcomm_SM8850_apply_plugin.tflite +3 -0
  36. sdxl_tflite_fp16/_compiled_models/diffusion_Qualcomm_SM8850_apply_plugin.tflite +0 -0
  37. sdxl_tflite_fp16/clip.tflite +3 -0
  38. sdxl_tflite_fp16/decoder.tflite +3 -0
  39. sdxl_tflite_fp16/diffusion.tflite +3 -0
  40. sdxl_tflite_fp16/open_clip.tflite +3 -0
  41. sdxl_tflite_fp16/text_projection.bin +3 -0
  42. sdxl_tflite_fp16/text_projection.npy +3 -0
  43. sdxl_tflite_quantized/_compiled_models/clip_Qualcomm_SM8850_apply_plugin.tflite +0 -0
  44. sdxl_tflite_quantized/_compiled_models/decoder_Qualcomm_SM8850_apply_plugin.tflite +0 -0
  45. sdxl_tflite_quantized/_compiled_models/diffusion_Qualcomm_SM8850_apply_plugin.tflite +0 -0
  46. sdxl_tflite_quantized/_compiled_models/open_clip_Qualcomm_SM8850_apply_plugin.tflite +0 -0
  47. sdxl_tflite_quantized/clip.tflite +3 -0
  48. sdxl_tflite_quantized/decoder.tflite +3 -0
  49. sdxl_tflite_quantized/diffusion.tflite +3 -0
  50. sdxl_tflite_quantized/open_clip.tflite +3 -0
.gitattributes CHANGED
@@ -33,3 +33,17 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ sdxl_tflite/generated_image.jpg filter=lfs diff=lfs merge=lfs -text
37
+ sdxl_tflite/playground_00.png filter=lfs diff=lfs merge=lfs -text
38
+ sdxl_tflite/playground_01.png filter=lfs diff=lfs merge=lfs -text
39
+ sdxl_tflite/playground_02.png filter=lfs diff=lfs merge=lfs -text
40
+ sdxl_tflite/playground_03.png filter=lfs diff=lfs merge=lfs -text
41
+ sdxl_tflite/playground_grid.png filter=lfs diff=lfs merge=lfs -text
42
+ sdxl_tflite/cfg_2.0.png filter=lfs diff=lfs merge=lfs -text
43
+ sdxl_tflite/cfg_7.5.png filter=lfs diff=lfs merge=lfs -text
44
+ sdxl_tflite/cfg_15.0.png filter=lfs diff=lfs merge=lfs -text
45
+ sdxl_tflite/cfg_comparison.png filter=lfs diff=lfs merge=lfs -text
46
+ sdxl_tflite/sampler_k_euler.png filter=lfs diff=lfs merge=lfs -text
47
+ sdxl_tflite/sampler_k_euler_ancestral.png filter=lfs diff=lfs merge=lfs -text
48
+ sdxl_tflite/sampler_k_lms.png filter=lfs diff=lfs merge=lfs -text
49
+ sdxl_tflite/sampler_comparison.png filter=lfs diff=lfs merge=lfs -text
.ipynb_checkpoints/Untitled-checkpoint.ipynb ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [],
3
+ "metadata": {},
4
+ "nbformat": 4,
5
+ "nbformat_minor": 5
6
+ }
.ipynb_checkpoints/sdxl_int4_aot_quantization-checkpoint.ipynb ADDED
@@ -0,0 +1,771 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "30781904-bd35-473d-9016-1f33ed9f036f",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "\n",
14
+ "==================================================\n",
15
+ "[clip] Quantizing...\n"
16
+ ]
17
+ },
18
+ {
19
+ "name": "stderr",
20
+ "output_type": "stream",
21
+ "text": [
22
+ "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:311: RuntimeWarning: invalid value encountered in divide\n",
23
+ " ret = np.divide(tensor_data, scales)\n",
24
+ "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:98: RuntimeWarning: invalid value encountered in cast\n",
25
+ " return tensor.astype(qtype)\n"
26
+ ]
27
+ },
28
+ {
29
+ "name": "stdout",
30
+ "output_type": "stream",
31
+ "text": [
32
+ "[clip] Size: 469.6 MB -> 66.8 MB (14%)\n",
33
+ "[clip] AOT compiling for SM8850...\n"
34
+ ]
35
+ },
36
+ {
37
+ "data": {
38
+ "application/vnd.jupyter.widget-view+json": {
39
+ "model_id": "d5ab30865c774458b49fb8b7586e314f",
40
+ "version_major": 2,
41
+ "version_minor": 0
42
+ },
43
+ "text/plain": [
44
+ "Backend: 0%| | 0/1 [00:00<?, ?it/s]"
45
+ ]
46
+ },
47
+ "metadata": {},
48
+ "output_type": "display_data"
49
+ },
50
+ {
51
+ "name": "stdout",
52
+ "output_type": "stream",
53
+ "text": [
54
+ "[clip] AOT: Success=1, Failed=0\n",
55
+ "\n",
56
+ "==================================================\n",
57
+ "[open_clip] Quantizing...\n"
58
+ ]
59
+ },
60
+ {
61
+ "name": "stderr",
62
+ "output_type": "stream",
63
+ "text": [
64
+ "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:311: RuntimeWarning: divide by zero encountered in divide\n",
65
+ " ret = np.divide(tensor_data, scales)\n"
66
+ ]
67
+ },
68
+ {
69
+ "name": "stdout",
70
+ "output_type": "stream",
71
+ "text": [
72
+ "[open_clip] Size: 2643.7 MB -> 374.0 MB (14%)\n",
73
+ "[open_clip] AOT compiling for SM8850...\n"
74
+ ]
75
+ },
76
+ {
77
+ "data": {
78
+ "application/vnd.jupyter.widget-view+json": {
79
+ "model_id": "0f7cde62157f4eb48338fedab0d8747a",
80
+ "version_major": 2,
81
+ "version_minor": 0
82
+ },
83
+ "text/plain": [
84
+ "Backend: 0%| | 0/1 [00:00<?, ?it/s]"
85
+ ]
86
+ },
87
+ "metadata": {},
88
+ "output_type": "display_data"
89
+ },
90
+ {
91
+ "name": "stdout",
92
+ "output_type": "stream",
93
+ "text": [
94
+ "[open_clip] AOT: Success=1, Failed=0\n",
95
+ "\n",
96
+ "==================================================\n",
97
+ "[diffusion] Quantizing...\n",
98
+ "[diffusion] Size: 9799.6 MB -> 2480.8 MB (25%)\n",
99
+ "[diffusion] AOT compiling for SM8850...\n"
100
+ ]
101
+ },
102
+ {
103
+ "data": {
104
+ "application/vnd.jupyter.widget-view+json": {
105
+ "model_id": "28a7fba90a074601b1834938352c8193",
106
+ "version_major": 2,
107
+ "version_minor": 0
108
+ },
109
+ "text/plain": [
110
+ "Backend: 0%| | 0/1 [00:00<?, ?it/s]"
111
+ ]
112
+ },
113
+ "metadata": {},
114
+ "output_type": "display_data"
115
+ }
116
+ ],
117
+ "source": [
118
+ " import os\n",
119
+ " from ai_edge_quantizer import quantizer as aie_quantizer\n",
120
+ " from ai_edge_litert.aot import aot_compile as aot_lib\n",
121
+ " from ai_edge_litert.aot.vendors.qualcomm import target as qnn_target\n",
122
+ "\n",
123
+ " SRC_DIR = \"/workspace/sdxl_tflite\"\n",
124
+ " OUT_DIR = \"/workspace/sdxl_tflite_aot_int4_blockwise_32\"\n",
125
+ " os.makedirs(OUT_DIR, exist_ok=True)\n",
126
+ "\n",
127
+ " COMPONENTS = [\"clip\", \"open_clip\", \"diffusion\", \"decoder\"]\n",
128
+ " sm8850_target = qnn_target.Target(qnn_target.SocModel.SM8850)\n",
129
+ "\n",
130
+ " int4_config = aie_quantizer.qtyping.OpQuantizationConfig(\n",
131
+ " weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
132
+ " num_bits=4,\n",
133
+ " symmetric=True,\n",
134
+ " granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
135
+ " dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
136
+ " ),\n",
137
+ " compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
138
+ " explicit_dequantize=False,\n",
139
+ " )\n",
140
+ "\n",
141
+ " results = {}\n",
142
+ "\n",
143
+ " for name in COMPONENTS:\n",
144
+ " src = f\"{SRC_DIR}/{name}.tflite\"\n",
145
+ " int4_path = f\"{OUT_DIR}/{name}_int4.tflite\"\n",
146
+ " print(f\"\\n{'='*50}\")\n",
147
+ " print(f\"[{name}] Quantizing...\")\n",
148
+ "\n",
149
+ " qt = aie_quantizer.Quantizer(src)\n",
150
+ " qt.update_quantization_recipe(\n",
151
+ " regex=\".*\",\n",
152
+ " operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
153
+ " algorithm_key=\"min_max_uniform_quantize\",\n",
154
+ " op_config=int4_config,\n",
155
+ " )\n",
156
+ " quant_result = qt.quantize()\n",
157
+ " with open(int4_path, \"wb\") as f:\n",
158
+ " f.write(quant_result.quantized_model)\n",
159
+ "\n",
160
+ " orig_mb = os.path.getsize(src) / 1024 / 1024\n",
161
+ " quant_mb = os.path.getsize(int4_path) / 1024 / 1024\n",
162
+ " print(f\"[{name}] Size: {orig_mb:.1f} MB -> {quant_mb:.1f} MB ({quant_mb/orig_mb*100:.0f}%)\")\n",
163
+ "\n",
164
+ " print(f\"[{name}] AOT compiling for SM8850...\")\n",
165
+ " aot_result = aot_lib.aot_compile(\n",
166
+ " int4_path,\n",
167
+ " output_dir=OUT_DIR,\n",
168
+ " target=[sm8850_target],\n",
169
+ " keep_going=True,\n",
170
+ " )\n",
171
+ "\n",
172
+ " success = len(aot_result.models_with_backend)\n",
173
+ " failed = len(aot_result.failed_backends)\n",
174
+ " results[name] = {\"success\": success, \"failed\": failed}\n",
175
+ " print(f\"[{name}] AOT: Success={success}, Failed={failed}\")\n",
176
+ " for backend, error in aot_result.failed_backends:\n",
177
+ " print(f\"[{name}] Error: {error[:300]}\")\n",
178
+ "\n",
179
+ " print(f\"\\n{'='*50}\")\n",
180
+ " print(\"Summary:\")\n",
181
+ " for name, r in results.items():\n",
182
+ " status = \"OK\" if r[\"success\"] > 0 else \"FAIL\"\n",
183
+ " print(f\" {name}: {status}\")\n",
184
+ " print(f\"\\nOutput: {OUT_DIR}\")\n",
185
+ " print(f\"Files: {os.listdir(OUT_DIR)}\")"
186
+ ]
187
+ },
188
+ {
189
+ "cell_type": "markdown",
190
+ "id": "3936e5c5-ceeb-4cc3-819c-702a38a89bf0",
191
+ "metadata": {},
192
+ "source": [
193
+ "# AOT Convertion Code\n",
194
+ "\n",
195
+ "위는 완성된 코드, 아래는 테스트용 잡다한거"
196
+ ]
197
+ },
198
+ {
199
+ "cell_type": "code",
200
+ "execution_count": 2,
201
+ "id": "a87c3a83-7811-465e-9079-bced67cbb82e",
202
+ "metadata": {},
203
+ "outputs": [
204
+ {
205
+ "name": "stdout",
206
+ "output_type": "stream",
207
+ "text": [
208
+ "['Quantizer', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'algorithm_manager', 'algorithm_manager_api', 'algorithms', 'calibrator', 'default_policy', 'model_modifier', 'model_validator', 'params_generator', 'qtyping', 'quantizer', 'recipe', 'recipe_manager', 'transformation_instruction_generator', 'transformation_performer', 'transformations', 'utils']\n",
209
+ "['__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__']\n"
210
+ ]
211
+ }
212
+ ],
213
+ "source": [
214
+ " # ai_edge_quantizer에 AieQuantizerT 구현체가 있는지 확인\n",
215
+ " import ai_edge_quantizer\n",
216
+ " print(dir(ai_edge_quantizer))\n",
217
+ "\n",
218
+ " # 혹시 ai_edge_litert.aot 쪽에 있는지도 확인\n",
219
+ " import ai_edge_litert.aot\n",
220
+ " print(dir(ai_edge_litert.aot))\n",
221
+ "\n",
222
+ " # components에서 import 가능한 quantizer 구현체 확인\n",
223
+ " from ai_edge_litert.aot.core import components\n",
224
+ " import inspect\n",
225
+ " for name, obj in inspect.getmembers(components):\n",
226
+ " if inspect.isclass(obj) and issubclass(obj, components.AieQuantizerT) and obj != components.AieQuantizerT:\n",
227
+ " print(f\"Found: {name}\")"
228
+ ]
229
+ },
230
+ {
231
+ "cell_type": "code",
232
+ "execution_count": 4,
233
+ "id": "b7f7c046-e555-417d-b877-60cf9845d2dc",
234
+ "metadata": {},
235
+ "outputs": [
236
+ {
237
+ "name": "stderr",
238
+ "output_type": "stream",
239
+ "text": [
240
+ "/usr/local/lib/python3.12/dist-packages/torch/distributed/distributed_c10d.py:351: UserWarning: Device capability of jax unspecified, assuming `cpu` and `cuda`. Please specify it via the `devices` argument of `register_backend`.\n",
241
+ " warnings.warn(\n",
242
+ "An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.\n",
243
+ "Skipping import of cpp extensions due to incompatible torch version 2.8.0+cu128 for torchao version 0.16.0 Please see https://github.com/pytorch/ao/issues/2919 for more info\n"
244
+ ]
245
+ },
246
+ {
247
+ "name": "stdout",
248
+ "output_type": "stream",
249
+ "text": [
250
+ "INT4 BLOCKWISE_32 verify passed: QuantConfig(pt2e_quantizer=None, generative_recipe=GenerativeQuantRecipe(\n",
251
+ " Default: (a:FP32, w:INT4, DYNAMIC_RANGE, MIN_MAX, BLOCKWISE_32, )\n",
252
+ " Embedding: None\n",
253
+ " Attention: None\n",
254
+ " Feedforward: None\n",
255
+ "), _quantizer_mode=<_QuantizerMode.AI_EDGE_QUANTIZER: 4>)\n"
256
+ ]
257
+ }
258
+ ],
259
+ "source": [
260
+ " from litert_torch.generative.quantize import quant_attrs, quant_recipes\n",
261
+ "\n",
262
+ " # INT4 BLOCKWISE_32 (DYNAMIC_RANGE 모드)\n",
263
+ " quant_config = quant_recipes.full_dynamic_recipe(\n",
264
+ " weight_dtype=quant_attrs.Dtype.INT4,\n",
265
+ " granularity=quant_attrs.Granularity.BLOCKWISE_32,\n",
266
+ " )\n",
267
+ " print(\"INT4 BLOCKWISE_32 verify passed:\", quant_config)\n"
268
+ ]
269
+ },
270
+ {
271
+ "cell_type": "code",
272
+ "execution_count": 5,
273
+ "id": "e8c3a18d-09fa-4c3f-8e92-83937f9a97de",
274
+ "metadata": {},
275
+ "outputs": [
276
+ {
277
+ "name": "stdout",
278
+ "output_type": "stream",
279
+ "text": [
280
+ "QuantConfig(pt2e_quantizer=None, generative_recipe=GenerativeQuantRecipe(\n",
281
+ " Default: (a:FP32, w:INT4, DYNAMIC_RANGE, MIN_MAX, BLOCKWISE_32, )\n",
282
+ " Embedding: None\n",
283
+ " Attention: None\n",
284
+ " Feedforward: None\n",
285
+ "), _quantizer_mode=<_QuantizerMode.AI_EDGE_QUANTIZER: 4>)\n"
286
+ ]
287
+ }
288
+ ],
289
+ "source": [
290
+ " from litert_torch.generative.quantize import quant_attrs, quant_recipes\n",
291
+ "\n",
292
+ " quant_config = quant_recipes.full_dynamic_recipe(\n",
293
+ " weight_dtype=quant_attrs.Dtype.INT4,\n",
294
+ " granularity=quant_attrs.Granularity.BLOCKWISE_32,\n",
295
+ " )\n",
296
+ " print(quant_config)"
297
+ ]
298
+ },
299
+ {
300
+ "cell_type": "code",
301
+ "execution_count": 6,
302
+ "id": "4619e935-a84e-4c01-ac96-ede6988f7e52",
303
+ "metadata": {},
304
+ "outputs": [
305
+ {
306
+ "ename": "NameError",
307
+ "evalue": "name 'clip_loaded' is not defined",
308
+ "output_type": "error",
309
+ "traceback": [
310
+ "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
311
+ "\u001b[31mNameError\u001b[39m Traceback (most recent call last)",
312
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[6]\u001b[39m\u001b[32m, line 19\u001b[39m\n\u001b[32m 14\u001b[39m os.makedirs(TEST_DIR, exist_ok=\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[32m 16\u001b[39m prompt_tokens = torch.full((\u001b[32m1\u001b[39m, \u001b[32m77\u001b[39m), \u001b[32m0\u001b[39m, dtype=torch.int) \u001b[38;5;66;03m# N_TOKENS=77\u001b[39;00m\n\u001b[32m 18\u001b[39m litert_torch.signature(\n\u001b[32m---> \u001b[39m\u001b[32m19\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mencode\u001b[39m\u001b[33m\"\u001b[39m, \u001b[43mclip_loaded\u001b[49m, (prompt_tokens,)\n\u001b[32m 20\u001b[39m ).convert(quant_config=quant_config).export(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mTEST_DIR\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m/clip_int4.tflite\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 21\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mINT4 clip conversion done\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 23\u001b[39m \u001b[38;5;66;03m# AOT 컴파일 테스트\u001b[39;00m\n",
313
+ "\u001b[31mNameError\u001b[39m: name 'clip_loaded' is not defined"
314
+ ]
315
+ }
316
+ ],
317
+ "source": [
318
+ " import os\n",
319
+ " import torch\n",
320
+ " from litert_torch.generative.quantize import quant_attrs, quant_recipes\n",
321
+ " import litert_torch\n",
322
+ "\n",
323
+ " # INT4 BLOCKWISE_32 config\n",
324
+ " quant_config = quant_recipes.full_dynamic_recipe(\n",
325
+ " weight_dtype=quant_attrs.Dtype.INT4,\n",
326
+ " granularity=quant_attrs.Granularity.BLOCKWISE_32,\n",
327
+ " )\n",
328
+ "\n",
329
+ " # clip 모델만 변환 (가장 가벼움)\n",
330
+ " TEST_DIR = \"/tmp/sdxl_int4_test\"\n",
331
+ " os.makedirs(TEST_DIR, exist_ok=True)\n",
332
+ "\n",
333
+ " prompt_tokens = torch.full((1, 77), 0, dtype=torch.int) # N_TOKENS=77\n",
334
+ "\n",
335
+ " litert_torch.signature(\n",
336
+ " \"encode\", clip_loaded, (prompt_tokens,)\n",
337
+ " ).convert(quant_config=quant_config).export(f\"{TEST_DIR}/clip_int4.tflite\")\n",
338
+ " print(\"INT4 clip conversion done\")\n",
339
+ "\n",
340
+ " # AOT 컴파일 테스트\n",
341
+ " from ai_edge_litert.aot import aot_compile as aot_lib\n",
342
+ " from ai_edge_litert.aot.vendors.qualcomm import target as qnn_target\n",
343
+ "\n",
344
+ " sm8850_target = qnn_target.Target(qnn_target.SocModel.SM8850)\n",
345
+ " result = aot_lib.aot_compile(\n",
346
+ " f\"{TEST_DIR}/clip_int4.tflite\",\n",
347
+ " target=[sm8850_target],\n",
348
+ " keep_going=True,\n",
349
+ " )\n",
350
+ "\n",
351
+ " print(f\"Success: {len(result.models_with_backend)}\")\n",
352
+ " print(f\"Failed: {len(result.failed_backends)}\")\n",
353
+ " for backend, error in result.failed_backends:\n",
354
+ " print(f\" {backend.target}: {error[:200]}\")"
355
+ ]
356
+ },
357
+ {
358
+ "cell_type": "code",
359
+ "execution_count": 7,
360
+ "id": "429c16cb-c8bf-4d66-ae08-e7fb0b35d0b3",
361
+ "metadata": {},
362
+ "outputs": [
363
+ {
364
+ "name": "stdout",
365
+ "output_type": "stream",
366
+ "text": [
367
+ "/tmp/sdxl_tflite: ['clip.tflite', 'open_clip.tflite', 'diffusion.tflite', 'decoder.tflite', 'text_projection.npy', 'generated_image.jpg', 'playground_00.png', 'playground_01.png', 'playground_02.png', 'playground_03.png', 'playground_grid.png', 'cfg_2.0.png', 'cfg_7.5.png', 'cfg_15.0.png', 'cfg_comparison.png', 'sampler_k_euler.png', 'sampler_k_euler_ancestral.png', 'sampler_k_lms.png', 'sampler_comparison.png']\n",
368
+ "/workspace/sdxl_tflite: ['open_clip.tflite', 'diffusion.tflite', 'decoder.tflite', 'text_projection.npy', 'generated_image.jpg', 'playground_00.png', 'playground_01.png', 'playground_02.png', 'playground_03.png', 'playground_grid.png', 'cfg_2.0.png', 'cfg_7.5.png', 'cfg_15.0.png', 'cfg_comparison.png', 'sampler_k_euler.png', 'sampler_k_euler_ancestral.png', 'sampler_k_lms.png', 'sampler_comparison.png', 'clip.tflite', '.ipynb_checkpoints']\n"
369
+ ]
370
+ }
371
+ ],
372
+ "source": [
373
+ " import os\n",
374
+ " # 이전 변환 결과가 남아있는지 확인\n",
375
+ " for d in [\"/tmp/sdxl_tflite\", \"/tmp/sdxl_tflite_quantized\", \"/workspace/sdxl_tflite\"]:\n",
376
+ " if os.path.exists(d):\n",
377
+ " files = os.listdir(d)\n",
378
+ " print(f\"{d}: {files}\")"
379
+ ]
380
+ },
381
+ {
382
+ "cell_type": "code",
383
+ "execution_count": 9,
384
+ "id": "70087fe5-d3e8-4bf2-ba1c-92c128d88a0c",
385
+ "metadata": {},
386
+ "outputs": [
387
+ {
388
+ "name": "stderr",
389
+ "output_type": "stream",
390
+ "text": [
391
+ "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:311: RuntimeWarning: invalid value encountered in divide\n",
392
+ " ret = np.divide(tensor_data, scales)\n",
393
+ "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:98: RuntimeWarning: invalid value encountered in cast\n",
394
+ " return tensor.astype(qtype)\n"
395
+ ]
396
+ },
397
+ {
398
+ "name": "stdout",
399
+ "output_type": "stream",
400
+ "text": [
401
+ "INT4 blockwise clip: 66.8 MB\n"
402
+ ]
403
+ },
404
+ {
405
+ "data": {
406
+ "application/vnd.jupyter.widget-view+json": {
407
+ "model_id": "27fdf6c884d9490f8909bf33a088a1c4",
408
+ "version_major": 2,
409
+ "version_minor": 0
410
+ },
411
+ "text/plain": [
412
+ "Backend: 0%| | 0/1 [00:00<?, ?it/s]"
413
+ ]
414
+ },
415
+ "metadata": {},
416
+ "output_type": "display_data"
417
+ },
418
+ {
419
+ "name": "stdout",
420
+ "output_type": "stream",
421
+ "text": [
422
+ "Success: 1\n",
423
+ "Failed: 0\n"
424
+ ]
425
+ }
426
+ ],
427
+ "source": [
428
+ " from ai_edge_quantizer import quantizer as aie_quantizer\n",
429
+ " from ai_edge_quantizer import recipe as aie_recipe\n",
430
+ " from ai_edge_litert.aot import aot_compile as aot_lib\n",
431
+ " from ai_edge_litert.aot.vendors.qualcomm import target as qnn_target\n",
432
+ " import os\n",
433
+ "\n",
434
+ " # 1. FP32 clip을 INT4 blockwise로 양자화\n",
435
+ " SRC = \"/tmp/sdxl_tflite/clip.tflite\"\n",
436
+ " TEST_DIR = \"/tmp/sdxl_int4_test\"\n",
437
+ " os.makedirs(TEST_DIR, exist_ok=True)\n",
438
+ "\n",
439
+ " qt.update_quantization_recipe(\n",
440
+ " regex=\".*\",\n",
441
+ " operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
442
+ " algorithm_key=\"min_max_uniform_quantize\",\n",
443
+ " op_config=aie_quantizer.qtyping.OpQuantizationConfig(\n",
444
+ " weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
445
+ " num_bits=4,\n",
446
+ " symmetric=True,\n",
447
+ " granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
448
+ " dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
449
+ " ),\n",
450
+ " compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
451
+ " explicit_dequantize=False,\n",
452
+ " ),\n",
453
+ " )\n",
454
+ " result = qt.quantize()\n",
455
+ " int4_path = f\"{TEST_DIR}/clip_int4.tflite\"\n",
456
+ " with open(int4_path, \"wb\") as f:\n",
457
+ " f.write(result.quantized_model)\n",
458
+ " print(f\"INT4 blockwise clip: {os.path.getsize(int4_path) / 1024 / 1024:.1f} MB\")\n",
459
+ "\n",
460
+ " # AOT 컴파일 테스트\n",
461
+ " sm8850_target = qnn_target.Target(qnn_target.SocModel.SM8850)\n",
462
+ " aot_result = aot_lib.aot_compile(\n",
463
+ " int4_path,\n",
464
+ " target=[sm8850_target],\n",
465
+ " keep_going=True,\n",
466
+ " )\n",
467
+ " print(f\"Success: {len(aot_result.models_with_backend)}\")\n",
468
+ " print(f\"Failed: {len(aot_result.failed_backends)}\")\n",
469
+ " for backend, error in aot_result.failed_backends:\n",
470
+ " print(f\" Error: {error[:300]}\")"
471
+ ]
472
+ },
473
+ {
474
+ "cell_type": "code",
475
+ "execution_count": null,
476
+ "id": "c9974fa3-8be4-4110-9194-892d05219ec7",
477
+ "metadata": {},
478
+ "outputs": [],
479
+ "source": [
480
+ " COMPONENTS = [\"open_clip\", \"decoder\", \"diffusion\"]\n",
481
+ " SRC_DIR = \"/tmp/sdxl_tflite\"\n",
482
+ "\n",
483
+ " for name in COMPONENTS:\n",
484
+ " src = f\"{SRC_DIR}/{name}.tflite\"\n",
485
+ " print(f\"\\n=== {name} ===\")\n",
486
+ "\n",
487
+ " qt = aie_quantizer.Quantizer(src)\n",
488
+ " qt.update_quantization_recipe(\n",
489
+ " regex=\".*\",\n",
490
+ " operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
491
+ " algorithm_key=\"min_max_uniform_quantize\",\n",
492
+ " op_config=aie_quantizer.qtyping.OpQuantizationConfig(\n",
493
+ " weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
494
+ " num_bits=4,\n",
495
+ " symmetric=True,\n",
496
+ " granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
497
+ " dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
498
+ " ),\n",
499
+ " compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
500
+ " explicit_dequantize=False,\n",
501
+ " ),\n",
502
+ " )\n",
503
+ " result = qt.quantize()\n",
504
+ " out_path = f\"{TEST_DIR}/{name}_int4.tflite\"\n",
505
+ " with open(out_path, \"wb\") as f:\n",
506
+ " f.write(result.quantized_model)\n",
507
+ "\n",
508
+ " orig_mb = os.path.getsize(src) / 1024 / 1024\n",
509
+ " quant_mb = os.path.getsize(out_path) / 1024 / 1024\n",
510
+ " print(f\" Size: {orig_mb:.1f} MB -> {quant_mb:.1f} MB ({quant_mb/orig_mb*100:.0f}%)\")\n",
511
+ "\n",
512
+ " aot_result = aot_lib.aot_compile(\n",
513
+ " out_path,\n",
514
+ " target=[sm8850_target],\n",
515
+ " keep_going=True,\n",
516
+ " )\n",
517
+ " print(f\" AOT: Success={len(aot_result.models_with_backend)}, Failed={len(aot_result.failed_backends)}\")\n",
518
+ " for backend, error in aot_result.failed_backends:\n",
519
+ " print(f\" Error: {error[:300]}\")"
520
+ ]
521
+ },
522
+ {
523
+ "cell_type": "code",
524
+ "execution_count": 10,
525
+ "id": "69c9286b-c246-438b-ac94-d5d7774de151",
526
+ "metadata": {},
527
+ "outputs": [
528
+ {
529
+ "name": "stdout",
530
+ "output_type": "stream",
531
+ "text": [
532
+ "Quantize: 469.6 MB -> 469.6 MB (100%)\n"
533
+ ]
534
+ },
535
+ {
536
+ "data": {
537
+ "application/vnd.jupyter.widget-view+json": {
538
+ "model_id": "7f78061d12674ad286882e3856f4e638",
539
+ "version_major": 2,
540
+ "version_minor": 0
541
+ },
542
+ "text/plain": [
543
+ "Backend: 0%| | 0/1 [00:00<?, ?it/s]"
544
+ ]
545
+ },
546
+ "metadata": {},
547
+ "output_type": "display_data"
548
+ },
549
+ {
550
+ "name": "stdout",
551
+ "output_type": "stream",
552
+ "text": [
553
+ "AOT: Success=1, Failed=0\n"
554
+ ]
555
+ }
556
+ ],
557
+ "source": [
558
+ " import os\n",
559
+ " from ai_edge_quantizer import quantizer as aie_quantizer\n",
560
+ " from ai_edge_litert.aot import aot_compile as aot_lib\n",
561
+ " from ai_edge_litert.aot.vendors.qualcomm import target as qnn_target\n",
562
+ "\n",
563
+ " SRC = \"/workspace/sdxl_tflite/clip.tflite\"\n",
564
+ " TEST_DIR = \"/tmp/sdxl_int8_blockwise_test\"\n",
565
+ " os.makedirs(TEST_DIR, exist_ok=True)\n",
566
+ "\n",
567
+ " # INT8 BLOCKWISE_32\n",
568
+ " qt = aie_quantizer.Quantizer(SRC)\n",
569
+ " qt.update_quantization_recipe(\n",
570
+ " regex=\".*\",\n",
571
+ " operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
572
+ " algorithm_key=\"min_max_uniform_quantize\",\n",
573
+ " op_config=aie_quantizer.qtyping.OpQuantizationConfig(\n",
574
+ " weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
575
+ " num_bits=8,\n",
576
+ " symmetric=True,\n",
577
+ " granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
578
+ " dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
579
+ " ),\n",
580
+ " compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
581
+ " explicit_dequantize=False,\n",
582
+ " ),\n",
583
+ " )\n",
584
+ " result = qt.quantize()\n",
585
+ " out_path = f\"{TEST_DIR}/clip_int8_bw32.tflite\"\n",
586
+ " with open(out_path, \"wb\") as f:\n",
587
+ " f.write(result.quantized_model)\n",
588
+ "\n",
589
+ " orig_mb = os.path.getsize(SRC) / 1024 / 1024\n",
590
+ " quant_mb = os.path.getsize(out_path) / 1024 / 1024\n",
591
+ " print(f\"Quantize: {orig_mb:.1f} MB -> {quant_mb:.1f} MB ({quant_mb/orig_mb*100:.0f}%)\")\n",
592
+ "\n",
593
+ " # AOT\n",
594
+ " sm8850_target = qnn_target.Target(qnn_target.SocModel.SM8850)\n",
595
+ " aot_result = aot_lib.aot_compile(\n",
596
+ " out_path,\n",
597
+ " target=[sm8850_target],\n",
598
+ " keep_going=True,\n",
599
+ " )\n",
600
+ " print(f\"AOT: Success={len(aot_result.models_with_backend)}, Failed={len(aot_result.failed_backends)}\")\n",
601
+ " for backend, error in aot_result.failed_backends:\n",
602
+ " print(f\"Error: {error[:300]}\")"
603
+ ]
604
+ },
605
+ {
606
+ "cell_type": "code",
607
+ "execution_count": null,
608
+ "id": "34754c5d-7180-4f3a-9171-de27988a28d9",
609
+ "metadata": {},
610
+ "outputs": [
611
+ {
612
+ "name": "stderr",
613
+ "output_type": "stream",
614
+ "text": [
615
+ "/usr/local/lib/python3.12/dist-packages/tensorflow/lite/python/interpreter.py:457: UserWarning: Warning: tf.lite.Interpreter is deprecated and is scheduled for deletion in\n",
616
+ " TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.\n",
617
+ " See the [migration guide](https://ai.google.dev/edge/litert/migration)\n",
618
+ " for details.\n",
619
+ " \n",
620
+ " warnings.warn(_INTERPRETER_DELETION_WARNING)\n"
621
+ ]
622
+ }
623
+ ],
624
+ "source": [
625
+ " # 양자화 전후 모델 내부 확인\n",
626
+ " import tensorflow as tf\n",
627
+ "\n",
628
+ " orig = tf.lite.Interpreter(model_path=\"/workspace/sdxl_tflite/clip.tflite\")\n",
629
+ " orig.allocate_tensors()\n",
630
+ "\n",
631
+ " quant = tf.lite.Interpreter(model_path=\"/tmp/sdxl_int8_blockwise_test/clip_int8_bw32.tflite\")\n",
632
+ " quant.allocate_tensors()\n",
633
+ "\n",
634
+ " # 가중치 텐서 dtype 비교\n",
635
+ " print(\"=== Original ===\")\n",
636
+ " for t in orig.get_tensor_details()[:5]:\n",
637
+ " print(f\" {t['name'][:50]:50s} dtype={t['dtype']}\")\n",
638
+ "\n",
639
+ " print(\"\\n=== INT8 blockwise ===\")\n",
640
+ " for t in quant.get_tensor_details()[:5]:\n",
641
+ " print(f\" {t['name'][:50]:50s} dtype={t['dtype']}\")\n",
642
+ "\n",
643
+ " # INT4 것도 비교\n",
644
+ " int4 = tf.lite.Interpreter(model_path=\"/tmp/sdxl_int4_test/clip_int4.tflite\")\n",
645
+ " int4.allocate_tensors()\n",
646
+ " print(\"\\n=== INT4 blockwise ===\")\n",
647
+ " for t in int4.get_tensor_details()[:5]:\n",
648
+ " print(f\" {t['name'][:50]:50s} dtype={t['dtype']}\")"
649
+ ]
650
+ },
651
+ {
652
+ "cell_type": "code",
653
+ "execution_count": 2,
654
+ "id": "98db8f3d-1099-468a-87b2-e7f60431b948",
655
+ "metadata": {},
656
+ "outputs": [
657
+ {
658
+ "name": "stdout",
659
+ "output_type": "stream",
660
+ "text": [
661
+ "Original: 469.6 MB\n",
662
+ "Quantized bytes: 469.6 MB\n"
663
+ ]
664
+ },
665
+ {
666
+ "name": "stderr",
667
+ "output_type": "stream",
668
+ "text": [
669
+ "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:311: RuntimeWarning: invalid value encountered in divide\n",
670
+ " ret = np.divide(tensor_data, scales)\n",
671
+ "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:98: RuntimeWarning: invalid value encountered in cast\n",
672
+ " return tensor.astype(qtype)\n"
673
+ ]
674
+ },
675
+ {
676
+ "name": "stdout",
677
+ "output_type": "stream",
678
+ "text": [
679
+ "INT4 quantized bytes: 66.8 MB\n"
680
+ ]
681
+ }
682
+ ],
683
+ "source": [
684
+ " import os\n",
685
+ "\n",
686
+ " # 양자화만 다시 해서 확인 (AOT 안 거침)\n",
687
+ " from ai_edge_quantizer import quantizer as aie_quantizer\n",
688
+ "\n",
689
+ " SRC = \"/workspace/sdxl_tflite/clip.tflite\"\n",
690
+ "\n",
691
+ " # INT8 blockwise 양자화만\n",
692
+ " qt = aie_quantizer.Quantizer(SRC)\n",
693
+ " qt.update_quantization_recipe(\n",
694
+ " regex=\".*\",\n",
695
+ " operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
696
+ " algorithm_key=\"min_max_uniform_quantize\",\n",
697
+ " op_config=aie_quantizer.qtyping.OpQuantizationConfig(\n",
698
+ " weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
699
+ " num_bits=8,\n",
700
+ " symmetric=True,\n",
701
+ " granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
702
+ " dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
703
+ " ),\n",
704
+ " compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
705
+ " explicit_dequantize=False,\n",
706
+ " ),\n",
707
+ " )\n",
708
+ " result = qt.quantize()\n",
709
+ "\n",
710
+ " # 양자화 결과 통계 확인\n",
711
+ " print(f\"Original: {os.path.getsize(SRC) / 1024 / 1024:.1f} MB\")\n",
712
+ " print(f\"Quantized bytes: {len(result.quantized_model) / 1024 / 1024:.1f} MB\")\n",
713
+ "\n",
714
+ " # 양자화 로그 확인\n",
715
+ " if hasattr(result, 'log'):\n",
716
+ " print(f\"Log: {result.log}\")\n",
717
+ " if hasattr(result, 'skipped_ops'):\n",
718
+ " print(f\"Skipped: {result.skipped_ops}\")\n",
719
+ "\n",
720
+ " # 비교: INT4도 같은 방식으로 크기 확인\n",
721
+ " qt4 = aie_quantizer.Quantizer(SRC)\n",
722
+ " qt4.update_quantization_recipe(\n",
723
+ " regex=\".*\",\n",
724
+ " operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
725
+ " algorithm_key=\"min_max_uniform_quantize\",\n",
726
+ " op_config=aie_quantizer.qtyping.OpQuantizationConfig(\n",
727
+ " weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
728
+ " num_bits=4,\n",
729
+ " symmetric=True,\n",
730
+ " granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
731
+ " dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
732
+ " ),\n",
733
+ " compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
734
+ " explicit_dequantize=False,\n",
735
+ " ),\n",
736
+ " )\n",
737
+ " result4 = qt4.quantize()\n",
738
+ " print(f\"INT4 quantized bytes: {len(result4.quantized_model) / 1024 / 1024:.1f} MB\")\n"
739
+ ]
740
+ },
741
+ {
742
+ "cell_type": "code",
743
+ "execution_count": null,
744
+ "id": "98d994de-58ff-4741-9521-f8d0823ff089",
745
+ "metadata": {},
746
+ "outputs": [],
747
+ "source": []
748
+ }
749
+ ],
750
+ "metadata": {
751
+ "kernelspec": {
752
+ "display_name": "Python 3 (ipykernel)",
753
+ "language": "python",
754
+ "name": "python3"
755
+ },
756
+ "language_info": {
757
+ "codemirror_mode": {
758
+ "name": "ipython",
759
+ "version": 3
760
+ },
761
+ "file_extension": ".py",
762
+ "mimetype": "text/x-python",
763
+ "name": "python",
764
+ "nbconvert_exporter": "python",
765
+ "pygments_lexer": "ipython3",
766
+ "version": "3.12.3"
767
+ }
768
+ },
769
+ "nbformat": 4,
770
+ "nbformat_minor": 5
771
+ }
.ipynb_checkpoints/sdxl_porting_example-checkpoint.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
Untitled.ipynb ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "950d36bf-2792-434e-920d-31954ec49878",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "text_projection.npy: 6.3 MB\n"
14
+ ]
15
+ }
16
+ ],
17
+ "source": [
18
+ " import os\n",
19
+ " npy_path = \"/workspace/sdxl_tflite/text_projection.npy\"\n",
20
+ " size_mb = os.path.getsize(npy_path) / 1024 / 1024\n",
21
+ " print(f\"text_projection.npy: {size_mb:.1f} MB\")"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 3,
27
+ "id": "0db2e3ce-274a-4af3-bdd3-87886afd603a",
28
+ "metadata": {},
29
+ "outputs": [
30
+ {
31
+ "name": "stdout",
32
+ "output_type": "stream",
33
+ "text": [
34
+ "Done: (1280, 1280), float32\n"
35
+ ]
36
+ }
37
+ ],
38
+ "source": [
39
+ " import numpy as np\n",
40
+ " tp = np.load(\"/workspace/sdxl_tflite_fp16/text_projection.npy\")\n",
41
+ " tp.astype(np.float32).tofile(\"/workspace/sdxl_tflite_fp16/text_projection.bin\")\n",
42
+ " print(f\"Done: {tp.shape}, {tp.dtype}\")"
43
+ ]
44
+ },
45
+ {
46
+ "cell_type": "code",
47
+ "execution_count": 4,
48
+ "id": "eb0f13c8-4ac0-4e8e-b717-5e4723b49e93",
49
+ "metadata": {},
50
+ "outputs": [
51
+ {
52
+ "name": "stdout",
53
+ "output_type": "stream",
54
+ "text": [
55
+ "/workspace/sdxl_tflite/clip.tflite: 470 MB\n",
56
+ "/workspace/sdxl_tflite/open_clip.tflite: 2644 MB\n",
57
+ "/workspace/sdxl_tflite/diffusion.tflite: 9800 MB\n",
58
+ "/workspace/sdxl_tflite/decoder.tflite: 189 MB\n",
59
+ "\n",
60
+ "/workspace/sdxl_tflite_fp16/clip.tflite: 235 MB\n",
61
+ "/workspace/sdxl_tflite_fp16/open_clip.tflite: 1323 MB\n",
62
+ "/workspace/sdxl_tflite_fp16/diffusion.tflite: 4906 MB\n",
63
+ "/workspace/sdxl_tflite_fp16/decoder.tflite: 95 MB\n",
64
+ "\n"
65
+ ]
66
+ }
67
+ ],
68
+ "source": [
69
+ " import os\n",
70
+ " for d in [\"/workspace/sdxl_tflite\", \"/workspace/sdxl_tflite_fp16\"]:\n",
71
+ " if os.path.exists(d):\n",
72
+ " for f in [\"clip.tflite\", \"open_clip.tflite\", \"diffusion.tflite\", \"decoder.tflite\"]:\n",
73
+ " path = f\"{d}/{f}\"\n",
74
+ " if os.path.exists(path):\n",
75
+ " size = os.path.getsize(path) / 1024 / 1024\n",
76
+ " print(f\"{d}/{f}: {size:.0f} MB\")\n",
77
+ " print()"
78
+ ]
79
+ },
80
+ {
81
+ "cell_type": "code",
82
+ "execution_count": 5,
83
+ "id": "4409a65f-e9b8-417a-98a3-6683c8e3d6f2",
84
+ "metadata": {},
85
+ "outputs": [
86
+ {
87
+ "name": "stdout",
88
+ "output_type": "stream",
89
+ "text": [
90
+ "FP16 clip: 235 MB\n"
91
+ ]
92
+ }
93
+ ],
94
+ "source": [
95
+ " # 원격 서버에서 실행\n",
96
+ " import os\n",
97
+ " size = os.path.getsize(\"/workspace/sdxl_tflite_fp16/clip.tflite\")\n",
98
+ " print(f\"FP16 clip: {size / 1024 / 1024:.0f} MB\")"
99
+ ]
100
+ },
101
+ {
102
+ "cell_type": "code",
103
+ "execution_count": 6,
104
+ "id": "d1b11b7e-3acd-4f78-a091-36f628198fc3",
105
+ "metadata": {},
106
+ "outputs": [
107
+ {
108
+ "name": "stdout",
109
+ "output_type": "stream",
110
+ "text": [
111
+ "/workspace/sdxl_tflite_quantized/clip.tflite: 120 MB\n",
112
+ "/workspace/sdxl_tflite_quantized/open_clip.tflite: 668 MB\n",
113
+ "/workspace/sdxl_tflite_quantized/diffusion.tflite: 2476 MB\n",
114
+ "/workspace/sdxl_tflite_quantized/decoder.tflite: 48 MB\n"
115
+ ]
116
+ }
117
+ ],
118
+ "source": [
119
+ " import os\n",
120
+ " # INT8 모델 경로 확인\n",
121
+ " for d in [\"/workspace/sdxl_tflite_quantized\", \"/tmp/sdxl_tflite_quantized\"]:\n",
122
+ " if os.path.exists(d):\n",
123
+ " for f in os.listdir(d):\n",
124
+ " if f.endswith('.tflite'):\n",
125
+ " size = os.path.getsize(f\"{d}/{f}\") / 1024 / 1024\n",
126
+ " print(f\"{d}/{f}: {size:.0f} MB\")"
127
+ ]
128
+ },
129
+ {
130
+ "cell_type": "code",
131
+ "execution_count": null,
132
+ "id": "3b16e345-80ad-495f-8151-8a1664b5446f",
133
+ "metadata": {},
134
+ "outputs": [],
135
+ "source": []
136
+ }
137
+ ],
138
+ "metadata": {
139
+ "kernelspec": {
140
+ "display_name": "Python 3 (ipykernel)",
141
+ "language": "python",
142
+ "name": "python3"
143
+ },
144
+ "language_info": {
145
+ "codemirror_mode": {
146
+ "name": "ipython",
147
+ "version": 3
148
+ },
149
+ "file_extension": ".py",
150
+ "mimetype": "text/x-python",
151
+ "name": "python",
152
+ "nbconvert_exporter": "python",
153
+ "pygments_lexer": "ipython3",
154
+ "version": "3.12.3"
155
+ }
156
+ },
157
+ "nbformat": 4,
158
+ "nbformat_minor": 5
159
+ }
sdxl_ai_pack/device_targeting_configuration.xml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <config:device-targeting-config
2
+ xmlns:config="http://schemas.android.com/apk/config">
3
+ <config:device-group name="Qualcomm_SM8850">
4
+ <config:device-selector>
5
+ <config:system-on-chip manufacturer="Qualcomm" model="SM8850"/>
6
+ </config:device-selector>
7
+ <config:device-selector>
8
+ <config:system-on-chip manufacturer="QTI" model="SM8850"/>
9
+ </config:device-selector>
10
+ </config:device-group>
11
+ </config:device-targeting-config>
sdxl_ai_pack/sdxl_clip_mtk/src/main/assets/model#group_Qualcomm_SM8850/placeholder.txt ADDED
File without changes
sdxl_ai_pack/sdxl_clip_mtk/src/main/assets/model#group_other/placeholder.txt ADDED
File without changes
sdxl_ai_pack/sdxl_decoder_mtk/src/main/assets/model#group_Qualcomm_SM8850/placeholder.txt ADDED
File without changes
sdxl_ai_pack/sdxl_decoder_mtk/src/main/assets/model#group_other/placeholder.txt ADDED
File without changes
sdxl_ai_pack/sdxl_diffusion/src/main/assets/model#group_Qualcomm_SM8850/diffusion.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c467a24fc1ae7aa397e74f0480ba981bdebe4b94b5808cb6a220bc350816acf
3
+ size 2587049280
sdxl_ai_pack/sdxl_diffusion_mtk/src/main/assets/model#group_Qualcomm_SM8850/placeholder.txt ADDED
File without changes
sdxl_ai_pack/sdxl_diffusion_mtk/src/main/assets/model#group_other/placeholder.txt ADDED
File without changes
sdxl_ai_pack/sdxl_open_clip_mtk/src/main/assets/model#group_Qualcomm_SM8850/placeholder.txt ADDED
File without changes
sdxl_ai_pack/sdxl_open_clip_mtk/src/main/assets/model#group_other/placeholder.txt ADDED
File without changes
sdxl_int4_aot_quantization.ipynb ADDED
@@ -0,0 +1,1485 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 3,
6
+ "id": "30781904-bd35-473d-9016-1f33ed9f036f",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "\n",
14
+ "==================================================\n",
15
+ "[clip] Quantizing...\n"
16
+ ]
17
+ },
18
+ {
19
+ "name": "stderr",
20
+ "output_type": "stream",
21
+ "text": [
22
+ "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:311: RuntimeWarning: invalid value encountered in divide\n",
23
+ " ret = np.divide(tensor_data, scales)\n",
24
+ "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:98: RuntimeWarning: invalid value encountered in cast\n",
25
+ " return tensor.astype(qtype)\n"
26
+ ]
27
+ },
28
+ {
29
+ "name": "stdout",
30
+ "output_type": "stream",
31
+ "text": [
32
+ "[clip] Size: 469.6 MB -> 66.8 MB (14%)\n",
33
+ "[clip] AOT compiling for SM8850...\n"
34
+ ]
35
+ },
36
+ {
37
+ "data": {
38
+ "application/vnd.jupyter.widget-view+json": {
39
+ "model_id": "d5ab30865c774458b49fb8b7586e314f",
40
+ "version_major": 2,
41
+ "version_minor": 0
42
+ },
43
+ "text/plain": [
44
+ "Backend: 0%| | 0/1 [00:00<?, ?it/s]"
45
+ ]
46
+ },
47
+ "metadata": {},
48
+ "output_type": "display_data"
49
+ },
50
+ {
51
+ "name": "stdout",
52
+ "output_type": "stream",
53
+ "text": [
54
+ "[clip] AOT: Success=1, Failed=0\n",
55
+ "\n",
56
+ "==================================================\n",
57
+ "[open_clip] Quantizing...\n"
58
+ ]
59
+ },
60
+ {
61
+ "name": "stderr",
62
+ "output_type": "stream",
63
+ "text": [
64
+ "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:311: RuntimeWarning: divide by zero encountered in divide\n",
65
+ " ret = np.divide(tensor_data, scales)\n"
66
+ ]
67
+ },
68
+ {
69
+ "name": "stdout",
70
+ "output_type": "stream",
71
+ "text": [
72
+ "[open_clip] Size: 2643.7 MB -> 374.0 MB (14%)\n",
73
+ "[open_clip] AOT compiling for SM8850...\n"
74
+ ]
75
+ },
76
+ {
77
+ "data": {
78
+ "application/vnd.jupyter.widget-view+json": {
79
+ "model_id": "0f7cde62157f4eb48338fedab0d8747a",
80
+ "version_major": 2,
81
+ "version_minor": 0
82
+ },
83
+ "text/plain": [
84
+ "Backend: 0%| | 0/1 [00:00<?, ?it/s]"
85
+ ]
86
+ },
87
+ "metadata": {},
88
+ "output_type": "display_data"
89
+ },
90
+ {
91
+ "name": "stdout",
92
+ "output_type": "stream",
93
+ "text": [
94
+ "[open_clip] AOT: Success=1, Failed=0\n",
95
+ "\n",
96
+ "==================================================\n",
97
+ "[diffusion] Quantizing...\n",
98
+ "[diffusion] Size: 9799.6 MB -> 2480.8 MB (25%)\n",
99
+ "[diffusion] AOT compiling for SM8850...\n"
100
+ ]
101
+ },
102
+ {
103
+ "data": {
104
+ "application/vnd.jupyter.widget-view+json": {
105
+ "model_id": "28a7fba90a074601b1834938352c8193",
106
+ "version_major": 2,
107
+ "version_minor": 0
108
+ },
109
+ "text/plain": [
110
+ "Backend: 0%| | 0/1 [00:00<?, ?it/s]"
111
+ ]
112
+ },
113
+ "metadata": {},
114
+ "output_type": "display_data"
115
+ },
116
+ {
117
+ "name": "stdout",
118
+ "output_type": "stream",
119
+ "text": [
120
+ "[diffusion] AOT: Success=1, Failed=0\n",
121
+ "\n",
122
+ "==================================================\n",
123
+ "[decoder] Quantizing...\n",
124
+ "[decoder] Size: 189.2 MB -> 185.8 MB (98%)\n",
125
+ "[decoder] AOT compiling for SM8850...\n"
126
+ ]
127
+ },
128
+ {
129
+ "data": {
130
+ "application/vnd.jupyter.widget-view+json": {
131
+ "model_id": "e382a5c82a6f45a9aece16b94fd090d8",
132
+ "version_major": 2,
133
+ "version_minor": 0
134
+ },
135
+ "text/plain": [
136
+ "Backend: 0%| | 0/1 [00:00<?, ?it/s]"
137
+ ]
138
+ },
139
+ "metadata": {},
140
+ "output_type": "display_data"
141
+ },
142
+ {
143
+ "name": "stdout",
144
+ "output_type": "stream",
145
+ "text": [
146
+ "[decoder] AOT: Success=1, Failed=0\n",
147
+ "\n",
148
+ "==================================================\n",
149
+ "Summary:\n",
150
+ " clip: OK\n",
151
+ " open_clip: OK\n",
152
+ " diffusion: OK\n",
153
+ " decoder: OK\n",
154
+ "\n",
155
+ "Output: /workspace/sdxl_tflite_aot_int4_blockwise_32\n",
156
+ "Files: ['clip_int4.tflite', 'clip_int4_Qualcomm_SM8850_apply_plugin.tflite', 'open_clip_int4.tflite', 'open_clip_int4_Qualcomm_SM8850_apply_plugin.tflite', 'diffusion_int4.tflite', 'diffusion_int4_Qualcomm_SM8850_apply_plugin.tflite', 'decoder_int4.tflite', 'decoder_int4_Qualcomm_SM8850_apply_plugin.tflite']\n"
157
+ ]
158
+ }
159
+ ],
160
+ "source": [
161
+ " import os\n",
162
+ " from ai_edge_quantizer import quantizer as aie_quantizer\n",
163
+ " from ai_edge_litert.aot import aot_compile as aot_lib\n",
164
+ " from ai_edge_litert.aot.vendors.qualcomm import target as qnn_target\n",
165
+ "\n",
166
+ " SRC_DIR = \"/workspace/sdxl_tflite\"\n",
167
+ " OUT_DIR = \"/workspace/sdxl_tflite_aot_int4_blockwise_32\"\n",
168
+ " os.makedirs(OUT_DIR, exist_ok=True)\n",
169
+ "\n",
170
+ " COMPONENTS = [\"clip\", \"open_clip\", \"diffusion\", \"decoder\"]\n",
171
+ " sm8850_target = qnn_target.Target(qnn_target.SocModel.SM8850)\n",
172
+ "\n",
173
+ " int4_config = aie_quantizer.qtyping.OpQuantizationConfig(\n",
174
+ " weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
175
+ " num_bits=4,\n",
176
+ " symmetric=True,\n",
177
+ " granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
178
+ " dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
179
+ " ),\n",
180
+ " compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
181
+ " explicit_dequantize=False,\n",
182
+ " )\n",
183
+ "\n",
184
+ " results = {}\n",
185
+ "\n",
186
+ " for name in COMPONENTS:\n",
187
+ " src = f\"{SRC_DIR}/{name}.tflite\"\n",
188
+ " int4_path = f\"{OUT_DIR}/{name}_int4.tflite\"\n",
189
+ " print(f\"\\n{'='*50}\")\n",
190
+ " print(f\"[{name}] Quantizing...\")\n",
191
+ "\n",
192
+ " qt = aie_quantizer.Quantizer(src)\n",
193
+ " qt.update_quantization_recipe(\n",
194
+ " regex=\".*\",\n",
195
+ " operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
196
+ " algorithm_key=\"min_max_uniform_quantize\",\n",
197
+ " op_config=int4_config,\n",
198
+ " )\n",
199
+ " quant_result = qt.quantize()\n",
200
+ " with open(int4_path, \"wb\") as f:\n",
201
+ " f.write(quant_result.quantized_model)\n",
202
+ "\n",
203
+ " orig_mb = os.path.getsize(src) / 1024 / 1024\n",
204
+ " quant_mb = os.path.getsize(int4_path) / 1024 / 1024\n",
205
+ " print(f\"[{name}] Size: {orig_mb:.1f} MB -> {quant_mb:.1f} MB ({quant_mb/orig_mb*100:.0f}%)\")\n",
206
+ "\n",
207
+ " print(f\"[{name}] AOT compiling for SM8850...\")\n",
208
+ " aot_result = aot_lib.aot_compile(\n",
209
+ " int4_path,\n",
210
+ " output_dir=OUT_DIR,\n",
211
+ " target=[sm8850_target],\n",
212
+ " keep_going=True,\n",
213
+ " )\n",
214
+ "\n",
215
+ " success = len(aot_result.models_with_backend)\n",
216
+ " failed = len(aot_result.failed_backends)\n",
217
+ " results[name] = {\"success\": success, \"failed\": failed}\n",
218
+ " print(f\"[{name}] AOT: Success={success}, Failed={failed}\")\n",
219
+ " for backend, error in aot_result.failed_backends:\n",
220
+ " print(f\"[{name}] Error: {error[:300]}\")\n",
221
+ "\n",
222
+ " print(f\"\\n{'='*50}\")\n",
223
+ " print(\"Summary:\")\n",
224
+ " for name, r in results.items():\n",
225
+ " status = \"OK\" if r[\"success\"] > 0 else \"FAIL\"\n",
226
+ " print(f\" {name}: {status}\")\n",
227
+ " print(f\"\\nOutput: {OUT_DIR}\")\n",
228
+ " print(f\"Files: {os.listdir(OUT_DIR)}\")"
229
+ ]
230
+ },
231
+ {
232
+ "cell_type": "markdown",
233
+ "id": "ae2cb21d-e31f-464d-96f3-91218650b94f",
234
+ "metadata": {},
235
+ "source": [
236
+ "위에는 int4 blockwise 32 양자화, 아래는 그걸 ai pack로 합치는 코드"
237
+ ]
238
+ },
239
+ {
240
+ "cell_type": "code",
241
+ "execution_count": 4,
242
+ "id": "09cf7c20-3b5c-4eba-aa43-5caee7180cc0",
243
+ "metadata": {
244
+ "scrolled": true
245
+ },
246
+ "outputs": [
247
+ {
248
+ "name": "stdout",
249
+ "output_type": "stream",
250
+ "text": [
251
+ "\n",
252
+ "=== clip ===\n"
253
+ ]
254
+ },
255
+ {
256
+ "data": {
257
+ "application/vnd.jupyter.widget-view+json": {
258
+ "model_id": "83ea5774c8564e3088f1ba34719269f0",
259
+ "version_major": 2,
260
+ "version_minor": 0
261
+ },
262
+ "text/plain": [
263
+ "Backend: 0%| | 0/1 [00:00<?, ?it/s]"
264
+ ]
265
+ },
266
+ "metadata": {},
267
+ "output_type": "display_data"
268
+ },
269
+ {
270
+ "name": "stdout",
271
+ "output_type": "stream",
272
+ "text": [
273
+ " AOT: Success=2 (including fallback)\n",
274
+ " AI Pack exported: sdxl_clip\n",
275
+ "\n",
276
+ "=== open_clip ===\n"
277
+ ]
278
+ },
279
+ {
280
+ "data": {
281
+ "application/vnd.jupyter.widget-view+json": {
282
+ "model_id": "b8b2874d25d743c4b33b09ad1f8b70c3",
283
+ "version_major": 2,
284
+ "version_minor": 0
285
+ },
286
+ "text/plain": [
287
+ "Backend: 0%| | 0/1 [00:00<?, ?it/s]"
288
+ ]
289
+ },
290
+ "metadata": {},
291
+ "output_type": "display_data"
292
+ },
293
+ {
294
+ "name": "stdout",
295
+ "output_type": "stream",
296
+ "text": [
297
+ " AOT: Success=2 (including fallback)\n",
298
+ " AI Pack exported: sdxl_open_clip\n",
299
+ "\n",
300
+ "=== diffusion ===\n"
301
+ ]
302
+ },
303
+ {
304
+ "data": {
305
+ "application/vnd.jupyter.widget-view+json": {
306
+ "model_id": "fe62dc74e729467db4af87dde7bda315",
307
+ "version_major": 2,
308
+ "version_minor": 0
309
+ },
310
+ "text/plain": [
311
+ "Backend: 0%| | 0/1 [00:00<?, ?it/s]"
312
+ ]
313
+ },
314
+ "metadata": {},
315
+ "output_type": "display_data"
316
+ },
317
+ {
318
+ "name": "stdout",
319
+ "output_type": "stream",
320
+ "text": [
321
+ " AOT: Success=2 (including fallback)\n",
322
+ " AI Pack exported: sdxl_diffusion\n",
323
+ "\n",
324
+ "=== decoder ===\n"
325
+ ]
326
+ },
327
+ {
328
+ "data": {
329
+ "application/vnd.jupyter.widget-view+json": {
330
+ "model_id": "829a4709709e462ca8f64b004d6a169a",
331
+ "version_major": 2,
332
+ "version_minor": 0
333
+ },
334
+ "text/plain": [
335
+ "Backend: 0%| | 0/1 [00:00<?, ?it/s]"
336
+ ]
337
+ },
338
+ "metadata": {},
339
+ "output_type": "display_data"
340
+ },
341
+ {
342
+ "name": "stdout",
343
+ "output_type": "stream",
344
+ "text": [
345
+ " AOT: Success=2 (including fallback)\n",
346
+ " AI Pack exported: sdxl_decoder\n",
347
+ "\n",
348
+ "==================================================\n",
349
+ "AI Pack directory: /workspace/sdxl_ai_pack\n",
350
+ "sdxl_ai_pack/\n",
351
+ " device_targeting_configuration.xml\n",
352
+ " sdxl_clip/\n",
353
+ " src/\n",
354
+ " main/\n",
355
+ " assets/\n",
356
+ " model#group_Qualcomm_SM8850/\n",
357
+ " clip.tflite\n",
358
+ " model#group_other/\n",
359
+ " clip.tflite\n",
360
+ " sdxl_clip_mtk/\n",
361
+ " src/\n",
362
+ " main/\n",
363
+ " assets/\n",
364
+ " model#group_Qualcomm_SM8850/\n",
365
+ " placeholder.txt\n",
366
+ " model#group_other/\n",
367
+ " placeholder.txt\n",
368
+ " sdxl_open_clip/\n",
369
+ " src/\n",
370
+ " main/\n",
371
+ " assets/\n",
372
+ " model#group_Qualcomm_SM8850/\n",
373
+ " open_clip.tflite\n",
374
+ " model#group_other/\n",
375
+ " open_clip.tflite\n",
376
+ " sdxl_open_clip_mtk/\n",
377
+ " src/\n",
378
+ " main/\n",
379
+ " assets/\n",
380
+ " model#group_Qualcomm_SM8850/\n",
381
+ " placeholder.txt\n",
382
+ " model#group_other/\n",
383
+ " placeholder.txt\n",
384
+ " sdxl_diffusion/\n",
385
+ " src/\n",
386
+ " main/\n",
387
+ " assets/\n",
388
+ " model#group_Qualcomm_SM8850/\n",
389
+ " diffusion.tflite\n",
390
+ " model#group_other/\n",
391
+ " diffusion.tflite\n",
392
+ " sdxl_diffusion_mtk/\n",
393
+ " src/\n",
394
+ " main/\n",
395
+ " assets/\n",
396
+ " model#group_Qualcomm_SM8850/\n",
397
+ " placeholder.txt\n",
398
+ " model#group_other/\n",
399
+ " placeholder.txt\n",
400
+ " sdxl_decoder/\n",
401
+ " src/\n",
402
+ " main/\n",
403
+ " assets/\n",
404
+ " model#group_Qualcomm_SM8850/\n",
405
+ " decoder.tflite\n",
406
+ " model#group_other/\n",
407
+ " decoder.tflite\n",
408
+ " sdxl_decoder_mtk/\n",
409
+ " src/\n",
410
+ " main/\n",
411
+ " assets/\n",
412
+ " model#group_Qualcomm_SM8850/\n",
413
+ " placeholder.txt\n",
414
+ " model#group_other/\n",
415
+ " placeholder.txt\n"
416
+ ]
417
+ }
418
+ ],
419
+ "source": [
420
+ " import os\n",
421
+ " import pathlib\n",
422
+ " import shutil\n",
423
+ " from ai_edge_litert.aot import aot_compile as aot_lib\n",
424
+ " from ai_edge_litert.aot.ai_pack import export_lib as ai_pack_export\n",
425
+ " from ai_edge_litert.aot.vendors.qualcomm import target as qnn_target\n",
426
+ " from ai_edge_litert.aot.core import types\n",
427
+ " from ai_edge_litert.aot.vendors.fallback_backend import FallbackBackend\n",
428
+ "\n",
429
+ " OUT_DIR = \"/workspace/sdxl_tflite_aot_int4_blockwise_32\"\n",
430
+ " AI_PACK_DIR = \"/workspace/sdxl_ai_pack\"\n",
431
+ " COMPONENTS = [\"clip\", \"open_clip\", \"diffusion\", \"decoder\"]\n",
432
+ "\n",
433
+ " shutil.rmtree(AI_PACK_DIR, ignore_errors=True)\n",
434
+ "\n",
435
+ " for name in COMPONENTS:\n",
436
+ " int4_path = f\"{OUT_DIR}/{name}_int4.tflite\"\n",
437
+ " print(f\"\\n=== {name} ===\")\n",
438
+ "\n",
439
+ " # SM8850 AOT 컴파일\n",
440
+ " sm8850_target = qnn_target.Target(qnn_target.SocModel.SM8850)\n",
441
+ " result = aot_lib.aot_compile(\n",
442
+ " int4_path,\n",
443
+ " target=[sm8850_target],\n",
444
+ " keep_going=True,\n",
445
+ " )\n",
446
+ "\n",
447
+ " # fallback 모델 추가 (INT4 양자화된 tflite를 CPU/GPU fallback으로 사용)\n",
448
+ " fallback_backend = FallbackBackend.create({\"backend_id\": FallbackBackend.id()})\n",
449
+ " fallback_model = types.Model.create_from_path(pathlib.Path(int4_path))\n",
450
+ " result.models_with_backend.append((fallback_backend, fallback_model))\n",
451
+ "\n",
452
+ " print(f\" AOT: Success={len(result.models_with_backend)} (including fallback)\")\n",
453
+ "\n",
454
+ " # AI Pack export\n",
455
+ " ai_pack_export.export(\n",
456
+ " compiled_models=result,\n",
457
+ " ai_pack_dir=AI_PACK_DIR,\n",
458
+ " ai_pack_name=f\"sdxl_{name}\",\n",
459
+ " litert_model_name=name,\n",
460
+ " )\n",
461
+ " print(f\" AI Pack exported: sdxl_{name}\")\n",
462
+ "\n",
463
+ " # 결과 확인\n",
464
+ " print(f\"\\n{'='*50}\")\n",
465
+ " print(f\"AI Pack directory: {AI_PACK_DIR}\")\n",
466
+ " for root, dirs, files in os.walk(AI_PACK_DIR):\n",
467
+ " level = root.replace(AI_PACK_DIR, '').count(os.sep)\n",
468
+ " indent = ' ' * level\n",
469
+ " print(f\"{indent}{os.path.basename(root)}/\")\n",
470
+ " subindent = ' ' * (level + 1)\n",
471
+ " for f in files:\n",
472
+ " print(f\"{subindent}{f}\")"
473
+ ]
474
+ },
475
+ {
476
+ "cell_type": "markdown",
477
+ "id": "5cb03868-abfe-46ba-aaf0-7752532626c2",
478
+ "metadata": {},
479
+ "source": [
480
+ "위에는 ai_pack로 패킹하는 코드 아래는 이미지 생성하는 테스트 코드"
481
+ ]
482
+ },
483
+ {
484
+ "cell_type": "code",
485
+ "execution_count": 9,
486
+ "id": "0b8067e3-b1d9-497a-938e-3bac70175efa",
487
+ "metadata": {},
488
+ "outputs": [],
489
+ "source": [
490
+ "\n",
491
+ " # import os\n",
492
+ " # import time\n",
493
+ " # import numpy as np\n",
494
+ " # import torch\n",
495
+ " # from PIL import Image\n",
496
+ " # from tqdm import auto as tqdm\n",
497
+ " # import litert_torch\n",
498
+ " # from ai_edge_litert import interpreter as interpreter_lib\n",
499
+ " # from litert_torch.generative.examples.stable_diffusion import samplers\n",
500
+ " # from litert_torch.generative.examples.stable_diffusion import tokenizer\n",
501
+ " # from litert_torch.generative.examples.stable_diffusion_xl import util\n",
502
+ "\n",
503
+ " # # ============================================================\n",
504
+ " # # 경로 설정\n",
505
+ " # # ============================================================\n",
506
+ " # MODEL_DIR = \"/workspace/sdxl_tflite_aot_int4_blockwise_32\"\n",
507
+ " # SRC_DIR = \"/workspace/sdxl_tflite\" # text_projection.npy 위치\n",
508
+ " # TOKENIZER_DIR = os.path.join(os.path.expanduser(\"~\"), \"stable-diffusion-xl/tokenizer\")\n",
509
+ " # OUTPUT_PATH = \"/workspace/sdxl_int4_generated.png\"\n",
510
+ "\n",
511
+ " # # ============================================================\n",
512
+ " # # 모델 로드\n",
513
+ " # # ============================================================\n",
514
+ " # print(\"Loading INT4 blockwise quantized models...\")\n",
515
+ "\n",
516
+ " # clip = litert_torch.load(f\"{MODEL_DIR}/clip_int4.tflite\")\n",
517
+ " # open_clip = litert_torch.load(f\"{MODEL_DIR}/open_clip_int4.tflite\")\n",
518
+ " # diffusion = litert_torch.load(f\"{MODEL_DIR}/diffusion_int4.tflite\")\n",
519
+ " # decoder = litert_torch.load(f\"{MODEL_DIR}/decoder_int4.tflite\")\n",
520
+ " # text_projection = np.load(f\"{SRC_DIR}/text_projection.npy\")\n",
521
+ " # tok = tokenizer.Tokenizer(TOKENIZER_DIR)\n",
522
+ "\n",
523
+ " # num_threads = os.cpu_count()\n",
524
+ " # for model in [clip, open_clip, diffusion, decoder]:\n",
525
+ " # model_bytes = model.model_content()\n",
526
+ " # model.set_interpreter_builder(\n",
527
+ " # lambda b=model_bytes: interpreter_lib.Interpreter(\n",
528
+ " # model_content=b,\n",
529
+ " # experimental_default_delegate_latest_features=False,\n",
530
+ " # num_threads=num_threads,\n",
531
+ " # )\n",
532
+ " # )\n",
533
+ " # print(f\"All models loaded. (num_threads={num_threads})\")\n",
534
+ "\n",
535
+ " # # ============================================================\n",
536
+ " # # 생성 파라미터\n",
537
+ " # # ============================================================\n",
538
+ " # PROMPT = \"a photograph of an astronaut riding a horse on the moon, high quality, detailed\"\n",
539
+ " # NEGATIVE_PROMPT = \"blurry, low quality\"\n",
540
+ " # CFG_SCALE = 7.5\n",
541
+ " # HEIGHT = 1024\n",
542
+ " # WIDTH = 1024\n",
543
+ " # N_STEPS = 20\n",
544
+ " # SEED = 42\n",
545
+ " # SAMPLER_NAME = \"k_euler\"\n",
546
+ "\n",
547
+ " # # ============================================================\n",
548
+ " # # 이미지 생성\n",
549
+ " # # ============================================================\n",
550
+ " # np.random.seed(SEED)\n",
551
+ " # start_time = time.time()\n",
552
+ "\n",
553
+ " # # 1. Sampler\n",
554
+ " # if SAMPLER_NAME == \"k_euler\":\n",
555
+ " # sampler = samplers.KEulerSampler(n_inference_steps=N_STEPS)\n",
556
+ " # elif SAMPLER_NAME == \"k_euler_ancestral\":\n",
557
+ " # sampler = samplers.KEulerAncestralSampler(n_inference_steps=N_STEPS)\n",
558
+ " # elif SAMPLER_NAME == \"k_lms\":\n",
559
+ " # sampler = samplers.KLMSSampler(n_inference_steps=N_STEPS)\n",
560
+ "\n",
561
+ " # # 2. Dual text encoding\n",
562
+ " # print(\"Step 1: Dual text encoding (CLIP-L + OpenCLIP-G)...\")\n",
563
+ " # cond_tokens = np.array(tok.encode(PROMPT)).astype(np.int32)\n",
564
+ " # uncond_tokens = np.array(tok.encode(NEGATIVE_PROMPT)).astype(np.int32)\n",
565
+ "\n",
566
+ " # cond_clip = clip(cond_tokens, signature_name=\"encode\")\n",
567
+ " # uncond_clip = clip(uncond_tokens, signature_name=\"encode\")\n",
568
+ "\n",
569
+ " # cond_open_hidden, cond_final = open_clip(cond_tokens, signature_name=\"encode\")\n",
570
+ " # uncond_open_hidden, uncond_final = open_clip(uncond_tokens, signature_name=\"encode\")\n",
571
+ "\n",
572
+ " # # EOS pooling + text_projection\n",
573
+ " # cond_eos_idx = int(np.argmax(cond_tokens))\n",
574
+ " # uncond_eos_idx = int(np.argmax(uncond_tokens))\n",
575
+ " # cond_pooled = (cond_final[0, cond_eos_idx] @ text_projection)[np.newaxis, :]\n",
576
+ " # uncond_pooled = (uncond_final[0, uncond_eos_idx] @ text_projection)[np.newaxis, :]\n",
577
+ "\n",
578
+ " # cond_context = np.concatenate([cond_clip, cond_open_hidden], axis=-1)\n",
579
+ " # uncond_context = np.concatenate([uncond_clip, uncond_open_hidden], axis=-1)\n",
580
+ " # context = np.concatenate([cond_context, uncond_context], axis=0)\n",
581
+ "\n",
582
+ " # # 3. Add embedding\n",
583
+ " # print(\"Step 2: Computing add_embedding...\")\n",
584
+ " # add_time_ids = util.get_add_time_ids(\n",
585
+ " # original_size=(HEIGHT, WIDTH),\n",
586
+ " # crop_coords=(0, 0),\n",
587
+ " # target_size=(HEIGHT, WIDTH),\n",
588
+ " # )\n",
589
+ " # time_ids_emb = util.encode_add_time_ids(add_time_ids).numpy()\n",
590
+ " # cond_add_emb = np.concatenate([cond_pooled, time_ids_emb], axis=-1)\n",
591
+ " # uncond_add_emb = np.concatenate([uncond_pooled, time_ids_emb], axis=-1)\n",
592
+ " # add_emb = np.concatenate([cond_add_emb, uncond_add_emb], axis=0).astype(np.float32)\n",
593
+ "\n",
594
+ " # # 4. Initialize latents\n",
595
+ " # print(\"Step 3: Initializing latents...\")\n",
596
+ " # noise_shape = (1, 4, HEIGHT // 8, WIDTH // 8)\n",
597
+ " # latents = np.random.normal(size=noise_shape).astype(np.float32)\n",
598
+ " # latents *= sampler.initial_scale\n",
599
+ "\n",
600
+ " # # 5. Diffusion loop\n",
601
+ " # print(f\"Step 4: Diffusion ({N_STEPS} steps)...\")\n",
602
+ " # timesteps = tqdm.tqdm(sampler.timesteps, desc=\"Denoising\")\n",
603
+ " # for _, timestep in enumerate(timesteps):\n",
604
+ " # time_embedding = util.get_time_embedding(timestep).numpy()\n",
605
+ " # input_latents = latents * sampler.get_input_scale()\n",
606
+ " # input_latents = input_latents.repeat(2, axis=0)\n",
607
+ "\n",
608
+ " # output = diffusion(\n",
609
+ " # input_latents.astype(np.float32),\n",
610
+ " # context.astype(np.float32),\n",
611
+ " # time_embedding.astype(np.float32),\n",
612
+ " # add_emb,\n",
613
+ " # signature_name=\"diffusion\",\n",
614
+ " # )\n",
615
+ " # output_cond, output_uncond = np.split(output, 2, axis=0)\n",
616
+ " # output = CFG_SCALE * (output_cond - output_uncond) + output_uncond\n",
617
+ " # latents = sampler.step(latents, output)\n",
618
+ "\n",
619
+ " # # 6. Decode\n",
620
+ " # print(\"Step 5: Decoding to image...\")\n",
621
+ " # images = decoder(latents.astype(np.float32), signature_name=\"decode\")\n",
622
+ " # images = util.rescale(images, (-1, 1), (0, 255), clamp=True)\n",
623
+ " # images = util.move_channel(images, to=\"last\")\n",
624
+ "\n",
625
+ " # result_image = Image.fromarray(images[0].astype(np.uint8))\n",
626
+ " # result_image.save(OUTPUT_PATH)\n",
627
+ "\n",
628
+ " # total_time = time.time() - start_time\n",
629
+ " # print(f\"\\nDone! Total time: {total_time:.1f}s\")\n",
630
+ " # print(f\"Image saved to: {OUTPUT_PATH}\")\n",
631
+ " # result_image"
632
+ ]
633
+ },
634
+ {
635
+ "cell_type": "code",
636
+ "execution_count": 10,
637
+ "id": "5f173a79-5fe0-47d9-ba16-502fb3ce1ddd",
638
+ "metadata": {},
639
+ "outputs": [
640
+ {
641
+ "name": "stdout",
642
+ "output_type": "stream",
643
+ "text": [
644
+ "Loading models...\n",
645
+ " clip loaded\n",
646
+ " open_clip loaded\n",
647
+ " diffusion loaded\n",
648
+ " decoder loaded\n",
649
+ "All models loaded!\n"
650
+ ]
651
+ }
652
+ ],
653
+ "source": [
654
+ " from ai_edge_litert import interpreter as interpreter_lib\n",
655
+ " import numpy as np\n",
656
+ "\n",
657
+ " MODEL_DIR = \"/workspace/sdxl_tflite_aot_int4_blockwise_32\"\n",
658
+ " num_threads = 32\n",
659
+ "\n",
660
+ " def load_model(path):\n",
661
+ " return interpreter_lib.Interpreter(\n",
662
+ " model_path=path,\n",
663
+ " num_threads=num_threads,\n",
664
+ " experimental_op_resolver_type=interpreter_lib.OpResolverType.BUILTIN_WITHOUT_DEFAULT_DELEGATES,\n",
665
+ " )\n",
666
+ "\n",
667
+ " def run_model(interp, inputs, signature_name):\n",
668
+ " runner = interp.get_signature_runner(signature_name)\n",
669
+ " # input key 이름 확인\n",
670
+ " input_details = runner.get_input_details()\n",
671
+ " output_details = runner.get_output_details()\n",
672
+ "\n",
673
+ " feed = {}\n",
674
+ " input_keys = list(input_details.keys())\n",
675
+ " for i, key in enumerate(input_keys):\n",
676
+ " feed[key] = inputs[i] if isinstance(inputs, (list, tuple)) else inputs\n",
677
+ "\n",
678
+ " result = runner(**feed)\n",
679
+ " output_keys = list(output_details.keys())\n",
680
+ " if len(output_keys) == 1:\n",
681
+ " return result[output_keys[0]]\n",
682
+ " return tuple(result[k] for k in output_keys)\n",
683
+ "\n",
684
+ " # 모델 로드\n",
685
+ " print(\"Loading models...\")\n",
686
+ " clip_interp = load_model(f\"{MODEL_DIR}/clip_int4.tflite\")\n",
687
+ " clip_interp.allocate_tensors()\n",
688
+ " print(\" clip loaded\")\n",
689
+ "\n",
690
+ " open_clip_interp = load_model(f\"{MODEL_DIR}/open_clip_int4.tflite\")\n",
691
+ " open_clip_interp.allocate_tensors()\n",
692
+ " print(\" open_clip loaded\")\n",
693
+ "\n",
694
+ " diffusion_interp = load_model(f\"{MODEL_DIR}/diffusion_int4.tflite\")\n",
695
+ " diffusion_interp.allocate_tensors()\n",
696
+ " print(\" diffusion loaded\")\n",
697
+ "\n",
698
+ " decoder_interp = load_model(f\"{MODEL_DIR}/decoder_int4.tflite\")\n",
699
+ " decoder_interp.allocate_tensors()\n",
700
+ " print(\" decoder loaded\")\n",
701
+ "\n",
702
+ " print(\"All models loaded!\")"
703
+ ]
704
+ },
705
+ {
706
+ "cell_type": "code",
707
+ "execution_count": 11,
708
+ "id": "be818314-4a91-48c9-a0db-3d4aee94e10b",
709
+ "metadata": {},
710
+ "outputs": [
711
+ {
712
+ "name": "stdout",
713
+ "output_type": "stream",
714
+ "text": [
715
+ "\n",
716
+ "=== clip ===\n",
717
+ " signature: encode\n",
718
+ " inputs: [('args_0', array([ 1, 77], dtype=int32), <class 'numpy.int32'>)]\n",
719
+ " outputs: [('output_0', array([ 1, 77, 768], dtype=int32), <class 'numpy.float32'>)]\n",
720
+ "\n",
721
+ "=== open_clip ===\n",
722
+ " signature: encode\n",
723
+ " inputs: [('args_0', array([ 1, 77], dtype=int32), <class 'numpy.int32'>)]\n",
724
+ " outputs: [('output_0', array([ 1, 77, 1280], dtype=int32), <class 'numpy.float32'>), ('output_1', array([ 1, 77, 1280], dtype=int32), <class 'numpy.float32'>)]\n",
725
+ "\n",
726
+ "=== diffusion ===\n",
727
+ " signature: diffusion\n",
728
+ " inputs: [('args_0', array([ 2, 4, 128, 128], dtype=int32), <class 'numpy.float32'>), ('args_1', array([ 2, 77, 2048], dtype=int32), <class 'numpy.float32'>), ('args_2', array([ 1, 320], dtype=int32), <class 'numpy.float32'>), ('args_3', array([ 2, 2816], dtype=int32), <class 'numpy.float32'>)]\n",
729
+ " outputs: [('output_0', array([ 2, 4, 128, 128], dtype=int32), <class 'numpy.float32'>)]\n",
730
+ "\n",
731
+ "=== decoder ===\n",
732
+ " signature: decode\n",
733
+ " inputs: [('args_0', array([ 1, 4, 128, 128], dtype=int32), <class 'numpy.float32'>)]\n",
734
+ " outputs: [('output_0', array([ 1, 3, 1024, 1024], dtype=int32), <class 'numpy.float32'>)]\n"
735
+ ]
736
+ }
737
+ ],
738
+ "source": [
739
+ " for name, interp in [(\"clip\", clip_interp), (\"open_clip\", open_clip_interp),\n",
740
+ " (\"diffusion\", diffusion_interp), (\"decoder\", decoder_interp)]:\n",
741
+ " sigs = interp.get_signature_list()\n",
742
+ " print(f\"\\n=== {name} ===\")\n",
743
+ " for sig_name in sigs:\n",
744
+ " runner = interp.get_signature_runner(sig_name)\n",
745
+ " inputs = runner.get_input_details()\n",
746
+ " outputs = runner.get_output_details()\n",
747
+ " print(f\" signature: {sig_name}\")\n",
748
+ " print(f\" inputs: {[(k, v['shape'], v['dtype']) for k, v in inputs.items()]}\")\n",
749
+ " print(f\" outputs: {[(k, v['shape'], v['dtype']) for k, v in outputs.items()]}\")"
750
+ ]
751
+ },
752
+ {
753
+ "cell_type": "code",
754
+ "execution_count": null,
755
+ "id": "1cf4bb8d-19d3-4a66-bbcf-f1f135bf9944",
756
+ "metadata": {},
757
+ "outputs": [
758
+ {
759
+ "name": "stdout",
760
+ "output_type": "stream",
761
+ "text": [
762
+ "Step 1: Dual text encoding...\n",
763
+ "Step 2: Computing add_embedding...\n",
764
+ "Step 3: Initializing latents...\n",
765
+ "Step 4: Diffusion (20 steps)...\n"
766
+ ]
767
+ },
768
+ {
769
+ "data": {
770
+ "application/vnd.jupyter.widget-view+json": {
771
+ "model_id": "e7a7f1c5949a4ea1bbb200c17f624850",
772
+ "version_major": 2,
773
+ "version_minor": 0
774
+ },
775
+ "text/plain": [
776
+ "Denoising: 0%| | 0/20 [00:00<?, ?it/s]"
777
+ ]
778
+ },
779
+ "metadata": {},
780
+ "output_type": "display_data"
781
+ }
782
+ ],
783
+ "source": [
784
+ " import os\n",
785
+ " import time\n",
786
+ " import numpy as np\n",
787
+ " import torch\n",
788
+ " from PIL import Image\n",
789
+ " from tqdm import auto as tqdm\n",
790
+ " from litert_torch.generative.examples.stable_diffusion import samplers\n",
791
+ " from litert_torch.generative.examples.stable_diffusion import tokenizer\n",
792
+ " from litert_torch.generative.examples.stable_diffusion_xl import util\n",
793
+ "\n",
794
+ " # ============================================================\n",
795
+ " # 설정\n",
796
+ " # ============================================================\n",
797
+ " SRC_DIR = \"/workspace/sdxl_tflite\"\n",
798
+ " TOKENIZER_DIR = os.path.join(os.path.expanduser(\"~\"), \"stable-diffusion-xl/tokenizer\")\n",
799
+ " OUTPUT_PATH = \"/workspace/sdxl_int4_generated.png\"\n",
800
+ "\n",
801
+ " text_projection = np.load(f\"{SRC_DIR}/text_projection.npy\")\n",
802
+ " tok = tokenizer.Tokenizer(TOKENIZER_DIR)\n",
803
+ "\n",
804
+ " PROMPT = \"a photograph of an astronaut riding a horse on the moon, high quality, detailed\"\n",
805
+ " NEGATIVE_PROMPT = \"blurry, low quality\"\n",
806
+ " CFG_SCALE = 7.5\n",
807
+ " HEIGHT, WIDTH = 1024, 1024\n",
808
+ " N_STEPS = 20\n",
809
+ " SEED = 42\n",
810
+ "\n",
811
+ " # ============================================================\n",
812
+ " # 헬퍼\n",
813
+ " # ============================================================\n",
814
+ " def run_signature(interp, sig_name, *args):\n",
815
+ " runner = interp.get_signature_runner(sig_name)\n",
816
+ " input_details = runner.get_input_details()\n",
817
+ " input_keys = list(input_details.keys())\n",
818
+ " feed = {input_keys[i]: args[i] for i in range(len(args))}\n",
819
+ " result = runner(**feed)\n",
820
+ " output_keys = list(result.keys())\n",
821
+ " if len(output_keys) == 1:\n",
822
+ " return result[output_keys[0]]\n",
823
+ " return tuple(result[k] for k in output_keys)\n",
824
+ "\n",
825
+ " # ============================================================\n",
826
+ " # 생성\n",
827
+ " # ============================================================\n",
828
+ " np.random.seed(SEED)\n",
829
+ " start_time = time.time()\n",
830
+ "\n",
831
+ " sampler = samplers.KEulerSampler(n_inference_steps=N_STEPS)\n",
832
+ "\n",
833
+ " # 1. Text encoding\n",
834
+ " print(\"Step 1: Dual text encoding...\")\n",
835
+ " cond_tokens = np.array(tok.encode(PROMPT)).astype(np.int32)\n",
836
+ " uncond_tokens = np.array(tok.encode(NEGATIVE_PROMPT)).astype(np.int32)\n",
837
+ "\n",
838
+ " cond_clip = run_signature(clip_interp, \"encode\", cond_tokens)\n",
839
+ " uncond_clip = run_signature(clip_interp, \"encode\", uncond_tokens)\n",
840
+ "\n",
841
+ " cond_open_hidden, cond_final = run_signature(open_clip_interp, \"encode\", cond_tokens)\n",
842
+ " uncond_open_hidden, uncond_final = run_signature(open_clip_interp, \"encode\", uncond_tokens)\n",
843
+ "\n",
844
+ " # EOS pooling + text_projection\n",
845
+ " cond_eos_idx = int(np.argmax(cond_tokens))\n",
846
+ " uncond_eos_idx = int(np.argmax(uncond_tokens))\n",
847
+ " cond_pooled = (cond_final[0, cond_eos_idx] @ text_projection)[np.newaxis, :]\n",
848
+ " uncond_pooled = (uncond_final[0, uncond_eos_idx] @ text_projection)[np.newaxis, :]\n",
849
+ "\n",
850
+ " cond_context = np.concatenate([cond_clip, cond_open_hidden], axis=-1)\n",
851
+ " uncond_context = np.concatenate([uncond_clip, uncond_open_hidden], axis=-1)\n",
852
+ " context = np.concatenate([cond_context, uncond_context], axis=0)\n",
853
+ "\n",
854
+ " # 2. Add embedding\n",
855
+ " print(\"Step 2: Computing add_embedding...\")\n",
856
+ " add_time_ids = util.get_add_time_ids(\n",
857
+ " original_size=(HEIGHT, WIDTH), crop_coords=(0, 0), target_size=(HEIGHT, WIDTH),\n",
858
+ " )\n",
859
+ " time_ids_emb = util.encode_add_time_ids(add_time_ids).numpy()\n",
860
+ " cond_add_emb = np.concatenate([cond_pooled, time_ids_emb], axis=-1)\n",
861
+ " uncond_add_emb = np.concatenate([uncond_pooled, time_ids_emb], axis=-1)\n",
862
+ " add_emb = np.concatenate([cond_add_emb, uncond_add_emb], axis=0).astype(np.float32)\n",
863
+ "\n",
864
+ " # 3. Initialize latents\n",
865
+ " print(\"Step 3: Initializing latents...\")\n",
866
+ " latents = np.random.normal(size=(1, 4, HEIGHT // 8, WIDTH // 8)).astype(np.float32)\n",
867
+ " latents *= sampler.initial_scale\n",
868
+ "\n",
869
+ " # 4. Diffusion loop\n",
870
+ " print(f\"Step 4: Diffusion ({N_STEPS} steps)...\")\n",
871
+ " for i, timestep in enumerate(tqdm.tqdm(sampler.timesteps, desc=\"Denoising\")):\n",
872
+ " time_embedding = util.get_time_embedding(timestep).numpy()\n",
873
+ " input_latents = latents * sampler.get_input_scale()\n",
874
+ " input_latents = input_latents.repeat(2, axis=0)\n",
875
+ "\n",
876
+ " output = run_signature(\n",
877
+ " diffusion_interp, \"diffusion\",\n",
878
+ " input_latents.astype(np.float32),\n",
879
+ " context.astype(np.float32),\n",
880
+ " time_embedding.astype(np.float32),\n",
881
+ " add_emb,\n",
882
+ " )\n",
883
+ " output_cond, output_uncond = np.split(output, 2, axis=0)\n",
884
+ " output = CFG_SCALE * (output_cond - output_uncond) + output_uncond\n",
885
+ " latents = sampler.step(latents, output)\n",
886
+ "\n",
887
+ " # 5. Decode\n",
888
+ " print(\"Step 5: Decoding to image...\")\n",
889
+ " images = run_signature(decoder_interp, \"decode\", latents.astype(np.float32))\n",
890
+ " images = util.rescale(images, (-1, 1), (0, 255), clamp=True)\n",
891
+ " images = util.move_channel(images, to=\"last\")\n",
892
+ "\n",
893
+ " result_image = Image.fromarray(images[0].astype(np.uint8))\n",
894
+ " result_image.save(OUTPUT_PATH)\n",
895
+ "\n",
896
+ " total_time = time.time() - start_time\n",
897
+ " print(f\"\\nDone! Total time: {total_time:.1f}s\")\n",
898
+ " print(f\"Image saved to: {OUTPUT_PATH}\")\n",
899
+ " result_image"
900
+ ]
901
+ },
902
+ {
903
+ "cell_type": "markdown",
904
+ "id": "3936e5c5-ceeb-4cc3-819c-702a38a89bf0",
905
+ "metadata": {},
906
+ "source": [
907
+ "# AOT Convertion Code\n",
908
+ "\n",
909
+ "위는 완성된 코드, 아래는 테스트용 잡다한거"
910
+ ]
911
+ },
912
+ {
913
+ "cell_type": "code",
914
+ "execution_count": 2,
915
+ "id": "a87c3a83-7811-465e-9079-bced67cbb82e",
916
+ "metadata": {},
917
+ "outputs": [
918
+ {
919
+ "name": "stdout",
920
+ "output_type": "stream",
921
+ "text": [
922
+ "['Quantizer', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'algorithm_manager', 'algorithm_manager_api', 'algorithms', 'calibrator', 'default_policy', 'model_modifier', 'model_validator', 'params_generator', 'qtyping', 'quantizer', 'recipe', 'recipe_manager', 'transformation_instruction_generator', 'transformation_performer', 'transformations', 'utils']\n",
923
+ "['__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__']\n"
924
+ ]
925
+ }
926
+ ],
927
+ "source": [
928
+ " # ai_edge_quantizer에 AieQuantizerT 구현체가 있는지 확인\n",
929
+ " import ai_edge_quantizer\n",
930
+ " print(dir(ai_edge_quantizer))\n",
931
+ "\n",
932
+ " # 혹시 ai_edge_litert.aot 쪽에 있는지도 확인\n",
933
+ " import ai_edge_litert.aot\n",
934
+ " print(dir(ai_edge_litert.aot))\n",
935
+ "\n",
936
+ " # components에서 import 가능한 quantizer 구현체 확인\n",
937
+ " from ai_edge_litert.aot.core import components\n",
938
+ " import inspect\n",
939
+ " for name, obj in inspect.getmembers(components):\n",
940
+ " if inspect.isclass(obj) and issubclass(obj, components.AieQuantizerT) and obj != components.AieQuantizerT:\n",
941
+ " print(f\"Found: {name}\")"
942
+ ]
943
+ },
944
+ {
945
+ "cell_type": "code",
946
+ "execution_count": 4,
947
+ "id": "b7f7c046-e555-417d-b877-60cf9845d2dc",
948
+ "metadata": {},
949
+ "outputs": [
950
+ {
951
+ "name": "stderr",
952
+ "output_type": "stream",
953
+ "text": [
954
+ "/usr/local/lib/python3.12/dist-packages/torch/distributed/distributed_c10d.py:351: UserWarning: Device capability of jax unspecified, assuming `cpu` and `cuda`. Please specify it via the `devices` argument of `register_backend`.\n",
955
+ " warnings.warn(\n",
956
+ "An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.\n",
957
+ "Skipping import of cpp extensions due to incompatible torch version 2.8.0+cu128 for torchao version 0.16.0 Please see https://github.com/pytorch/ao/issues/2919 for more info\n"
958
+ ]
959
+ },
960
+ {
961
+ "name": "stdout",
962
+ "output_type": "stream",
963
+ "text": [
964
+ "INT4 BLOCKWISE_32 verify passed: QuantConfig(pt2e_quantizer=None, generative_recipe=GenerativeQuantRecipe(\n",
965
+ " Default: (a:FP32, w:INT4, DYNAMIC_RANGE, MIN_MAX, BLOCKWISE_32, )\n",
966
+ " Embedding: None\n",
967
+ " Attention: None\n",
968
+ " Feedforward: None\n",
969
+ "), _quantizer_mode=<_QuantizerMode.AI_EDGE_QUANTIZER: 4>)\n"
970
+ ]
971
+ }
972
+ ],
973
+ "source": [
974
+ " from litert_torch.generative.quantize import quant_attrs, quant_recipes\n",
975
+ "\n",
976
+ " # INT4 BLOCKWISE_32 (DYNAMIC_RANGE 모드)\n",
977
+ " quant_config = quant_recipes.full_dynamic_recipe(\n",
978
+ " weight_dtype=quant_attrs.Dtype.INT4,\n",
979
+ " granularity=quant_attrs.Granularity.BLOCKWISE_32,\n",
980
+ " )\n",
981
+ " print(\"INT4 BLOCKWISE_32 verify passed:\", quant_config)\n"
982
+ ]
983
+ },
984
+ {
985
+ "cell_type": "code",
986
+ "execution_count": 5,
987
+ "id": "e8c3a18d-09fa-4c3f-8e92-83937f9a97de",
988
+ "metadata": {},
989
+ "outputs": [
990
+ {
991
+ "name": "stdout",
992
+ "output_type": "stream",
993
+ "text": [
994
+ "QuantConfig(pt2e_quantizer=None, generative_recipe=GenerativeQuantRecipe(\n",
995
+ " Default: (a:FP32, w:INT4, DYNAMIC_RANGE, MIN_MAX, BLOCKWISE_32, )\n",
996
+ " Embedding: None\n",
997
+ " Attention: None\n",
998
+ " Feedforward: None\n",
999
+ "), _quantizer_mode=<_QuantizerMode.AI_EDGE_QUANTIZER: 4>)\n"
1000
+ ]
1001
+ }
1002
+ ],
1003
+ "source": [
1004
+ " from litert_torch.generative.quantize import quant_attrs, quant_recipes\n",
1005
+ "\n",
1006
+ " quant_config = quant_recipes.full_dynamic_recipe(\n",
1007
+ " weight_dtype=quant_attrs.Dtype.INT4,\n",
1008
+ " granularity=quant_attrs.Granularity.BLOCKWISE_32,\n",
1009
+ " )\n",
1010
+ " print(quant_config)"
1011
+ ]
1012
+ },
1013
+ {
1014
+ "cell_type": "code",
1015
+ "execution_count": 6,
1016
+ "id": "4619e935-a84e-4c01-ac96-ede6988f7e52",
1017
+ "metadata": {},
1018
+ "outputs": [
1019
+ {
1020
+ "ename": "NameError",
1021
+ "evalue": "name 'clip_loaded' is not defined",
1022
+ "output_type": "error",
1023
+ "traceback": [
1024
+ "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
1025
+ "\u001b[31mNameError\u001b[39m Traceback (most recent call last)",
1026
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[6]\u001b[39m\u001b[32m, line 19\u001b[39m\n\u001b[32m 14\u001b[39m os.makedirs(TEST_DIR, exist_ok=\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[32m 16\u001b[39m prompt_tokens = torch.full((\u001b[32m1\u001b[39m, \u001b[32m77\u001b[39m), \u001b[32m0\u001b[39m, dtype=torch.int) \u001b[38;5;66;03m# N_TOKENS=77\u001b[39;00m\n\u001b[32m 18\u001b[39m litert_torch.signature(\n\u001b[32m---> \u001b[39m\u001b[32m19\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mencode\u001b[39m\u001b[33m\"\u001b[39m, \u001b[43mclip_loaded\u001b[49m, (prompt_tokens,)\n\u001b[32m 20\u001b[39m ).convert(quant_config=quant_config).export(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mTEST_DIR\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m/clip_int4.tflite\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 21\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mINT4 clip conversion done\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 23\u001b[39m \u001b[38;5;66;03m# AOT 컴파일 테스트\u001b[39;00m\n",
1027
+ "\u001b[31mNameError\u001b[39m: name 'clip_loaded' is not defined"
1028
+ ]
1029
+ }
1030
+ ],
1031
+ "source": [
1032
+ " import os\n",
1033
+ " import torch\n",
1034
+ " from litert_torch.generative.quantize import quant_attrs, quant_recipes\n",
1035
+ " import litert_torch\n",
1036
+ "\n",
1037
+ " # INT4 BLOCKWISE_32 config\n",
1038
+ " quant_config = quant_recipes.full_dynamic_recipe(\n",
1039
+ " weight_dtype=quant_attrs.Dtype.INT4,\n",
1040
+ " granularity=quant_attrs.Granularity.BLOCKWISE_32,\n",
1041
+ " )\n",
1042
+ "\n",
1043
+ " # clip 모델만 변환 (가장 가벼움)\n",
1044
+ " TEST_DIR = \"/tmp/sdxl_int4_test\"\n",
1045
+ " os.makedirs(TEST_DIR, exist_ok=True)\n",
1046
+ "\n",
1047
+ " prompt_tokens = torch.full((1, 77), 0, dtype=torch.int) # N_TOKENS=77\n",
1048
+ "\n",
1049
+ " litert_torch.signature(\n",
1050
+ " \"encode\", clip_loaded, (prompt_tokens,)\n",
1051
+ " ).convert(quant_config=quant_config).export(f\"{TEST_DIR}/clip_int4.tflite\")\n",
1052
+ " print(\"INT4 clip conversion done\")\n",
1053
+ "\n",
1054
+ " # AOT 컴파일 테스트\n",
1055
+ " from ai_edge_litert.aot import aot_compile as aot_lib\n",
1056
+ " from ai_edge_litert.aot.vendors.qualcomm import target as qnn_target\n",
1057
+ "\n",
1058
+ " sm8850_target = qnn_target.Target(qnn_target.SocModel.SM8850)\n",
1059
+ " result = aot_lib.aot_compile(\n",
1060
+ " f\"{TEST_DIR}/clip_int4.tflite\",\n",
1061
+ " target=[sm8850_target],\n",
1062
+ " keep_going=True,\n",
1063
+ " )\n",
1064
+ "\n",
1065
+ " print(f\"Success: {len(result.models_with_backend)}\")\n",
1066
+ " print(f\"Failed: {len(result.failed_backends)}\")\n",
1067
+ " for backend, error in result.failed_backends:\n",
1068
+ " print(f\" {backend.target}: {error[:200]}\")"
1069
+ ]
1070
+ },
1071
+ {
1072
+ "cell_type": "code",
1073
+ "execution_count": 7,
1074
+ "id": "429c16cb-c8bf-4d66-ae08-e7fb0b35d0b3",
1075
+ "metadata": {},
1076
+ "outputs": [
1077
+ {
1078
+ "name": "stdout",
1079
+ "output_type": "stream",
1080
+ "text": [
1081
+ "/tmp/sdxl_tflite: ['clip.tflite', 'open_clip.tflite', 'diffusion.tflite', 'decoder.tflite', 'text_projection.npy', 'generated_image.jpg', 'playground_00.png', 'playground_01.png', 'playground_02.png', 'playground_03.png', 'playground_grid.png', 'cfg_2.0.png', 'cfg_7.5.png', 'cfg_15.0.png', 'cfg_comparison.png', 'sampler_k_euler.png', 'sampler_k_euler_ancestral.png', 'sampler_k_lms.png', 'sampler_comparison.png']\n",
1082
+ "/workspace/sdxl_tflite: ['open_clip.tflite', 'diffusion.tflite', 'decoder.tflite', 'text_projection.npy', 'generated_image.jpg', 'playground_00.png', 'playground_01.png', 'playground_02.png', 'playground_03.png', 'playground_grid.png', 'cfg_2.0.png', 'cfg_7.5.png', 'cfg_15.0.png', 'cfg_comparison.png', 'sampler_k_euler.png', 'sampler_k_euler_ancestral.png', 'sampler_k_lms.png', 'sampler_comparison.png', 'clip.tflite', '.ipynb_checkpoints']\n"
1083
+ ]
1084
+ }
1085
+ ],
1086
+ "source": [
1087
+ " import os\n",
1088
+ " # 이전 변환 결과가 남아있는지 확인\n",
1089
+ " for d in [\"/tmp/sdxl_tflite\", \"/tmp/sdxl_tflite_quantized\", \"/workspace/sdxl_tflite\"]:\n",
1090
+ " if os.path.exists(d):\n",
1091
+ " files = os.listdir(d)\n",
1092
+ " print(f\"{d}: {files}\")"
1093
+ ]
1094
+ },
1095
+ {
1096
+ "cell_type": "code",
1097
+ "execution_count": 9,
1098
+ "id": "70087fe5-d3e8-4bf2-ba1c-92c128d88a0c",
1099
+ "metadata": {},
1100
+ "outputs": [
1101
+ {
1102
+ "name": "stderr",
1103
+ "output_type": "stream",
1104
+ "text": [
1105
+ "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:311: RuntimeWarning: invalid value encountered in divide\n",
1106
+ " ret = np.divide(tensor_data, scales)\n",
1107
+ "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:98: RuntimeWarning: invalid value encountered in cast\n",
1108
+ " return tensor.astype(qtype)\n"
1109
+ ]
1110
+ },
1111
+ {
1112
+ "name": "stdout",
1113
+ "output_type": "stream",
1114
+ "text": [
1115
+ "INT4 blockwise clip: 66.8 MB\n"
1116
+ ]
1117
+ },
1118
+ {
1119
+ "data": {
1120
+ "application/vnd.jupyter.widget-view+json": {
1121
+ "model_id": "27fdf6c884d9490f8909bf33a088a1c4",
1122
+ "version_major": 2,
1123
+ "version_minor": 0
1124
+ },
1125
+ "text/plain": [
1126
+ "Backend: 0%| | 0/1 [00:00<?, ?it/s]"
1127
+ ]
1128
+ },
1129
+ "metadata": {},
1130
+ "output_type": "display_data"
1131
+ },
1132
+ {
1133
+ "name": "stdout",
1134
+ "output_type": "stream",
1135
+ "text": [
1136
+ "Success: 1\n",
1137
+ "Failed: 0\n"
1138
+ ]
1139
+ }
1140
+ ],
1141
+ "source": [
1142
+ " from ai_edge_quantizer import quantizer as aie_quantizer\n",
1143
+ " from ai_edge_quantizer import recipe as aie_recipe\n",
1144
+ " from ai_edge_litert.aot import aot_compile as aot_lib\n",
1145
+ " from ai_edge_litert.aot.vendors.qualcomm import target as qnn_target\n",
1146
+ " import os\n",
1147
+ "\n",
1148
+ " # 1. FP32 clip을 INT4 blockwise로 양자화\n",
1149
+ " SRC = \"/tmp/sdxl_tflite/clip.tflite\"\n",
1150
+ " TEST_DIR = \"/tmp/sdxl_int4_test\"\n",
1151
+ " os.makedirs(TEST_DIR, exist_ok=True)\n",
1152
+ "\n",
1153
+ " qt.update_quantization_recipe(\n",
1154
+ " regex=\".*\",\n",
1155
+ " operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
1156
+ " algorithm_key=\"min_max_uniform_quantize\",\n",
1157
+ " op_config=aie_quantizer.qtyping.OpQuantizationConfig(\n",
1158
+ " weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
1159
+ " num_bits=4,\n",
1160
+ " symmetric=True,\n",
1161
+ " granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
1162
+ " dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
1163
+ " ),\n",
1164
+ " compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
1165
+ " explicit_dequantize=False,\n",
1166
+ " ),\n",
1167
+ " )\n",
1168
+ " result = qt.quantize()\n",
1169
+ " int4_path = f\"{TEST_DIR}/clip_int4.tflite\"\n",
1170
+ " with open(int4_path, \"wb\") as f:\n",
1171
+ " f.write(result.quantized_model)\n",
1172
+ " print(f\"INT4 blockwise clip: {os.path.getsize(int4_path) / 1024 / 1024:.1f} MB\")\n",
1173
+ "\n",
1174
+ " # AOT 컴파일 테스트\n",
1175
+ " sm8850_target = qnn_target.Target(qnn_target.SocModel.SM8850)\n",
1176
+ " aot_result = aot_lib.aot_compile(\n",
1177
+ " int4_path,\n",
1178
+ " target=[sm8850_target],\n",
1179
+ " keep_going=True,\n",
1180
+ " )\n",
1181
+ " print(f\"Success: {len(aot_result.models_with_backend)}\")\n",
1182
+ " print(f\"Failed: {len(aot_result.failed_backends)}\")\n",
1183
+ " for backend, error in aot_result.failed_backends:\n",
1184
+ " print(f\" Error: {error[:300]}\")"
1185
+ ]
1186
+ },
1187
+ {
1188
+ "cell_type": "code",
1189
+ "execution_count": null,
1190
+ "id": "c9974fa3-8be4-4110-9194-892d05219ec7",
1191
+ "metadata": {},
1192
+ "outputs": [],
1193
+ "source": [
1194
+ " COMPONENTS = [\"open_clip\", \"decoder\", \"diffusion\"]\n",
1195
+ " SRC_DIR = \"/tmp/sdxl_tflite\"\n",
1196
+ "\n",
1197
+ " for name in COMPONENTS:\n",
1198
+ " src = f\"{SRC_DIR}/{name}.tflite\"\n",
1199
+ " print(f\"\\n=== {name} ===\")\n",
1200
+ "\n",
1201
+ " qt = aie_quantizer.Quantizer(src)\n",
1202
+ " qt.update_quantization_recipe(\n",
1203
+ " regex=\".*\",\n",
1204
+ " operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
1205
+ " algorithm_key=\"min_max_uniform_quantize\",\n",
1206
+ " op_config=aie_quantizer.qtyping.OpQuantizationConfig(\n",
1207
+ " weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
1208
+ " num_bits=4,\n",
1209
+ " symmetric=True,\n",
1210
+ " granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
1211
+ " dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
1212
+ " ),\n",
1213
+ " compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
1214
+ " explicit_dequantize=False,\n",
1215
+ " ),\n",
1216
+ " )\n",
1217
+ " result = qt.quantize()\n",
1218
+ " out_path = f\"{TEST_DIR}/{name}_int4.tflite\"\n",
1219
+ " with open(out_path, \"wb\") as f:\n",
1220
+ " f.write(result.quantized_model)\n",
1221
+ "\n",
1222
+ " orig_mb = os.path.getsize(src) / 1024 / 1024\n",
1223
+ " quant_mb = os.path.getsize(out_path) / 1024 / 1024\n",
1224
+ " print(f\" Size: {orig_mb:.1f} MB -> {quant_mb:.1f} MB ({quant_mb/orig_mb*100:.0f}%)\")\n",
1225
+ "\n",
1226
+ " aot_result = aot_lib.aot_compile(\n",
1227
+ " out_path,\n",
1228
+ " target=[sm8850_target],\n",
1229
+ " keep_going=True,\n",
1230
+ " )\n",
1231
+ " print(f\" AOT: Success={len(aot_result.models_with_backend)}, Failed={len(aot_result.failed_backends)}\")\n",
1232
+ " for backend, error in aot_result.failed_backends:\n",
1233
+ " print(f\" Error: {error[:300]}\")"
1234
+ ]
1235
+ },
1236
+ {
1237
+ "cell_type": "code",
1238
+ "execution_count": 10,
1239
+ "id": "69c9286b-c246-438b-ac94-d5d7774de151",
1240
+ "metadata": {},
1241
+ "outputs": [
1242
+ {
1243
+ "name": "stdout",
1244
+ "output_type": "stream",
1245
+ "text": [
1246
+ "Quantize: 469.6 MB -> 469.6 MB (100%)\n"
1247
+ ]
1248
+ },
1249
+ {
1250
+ "data": {
1251
+ "application/vnd.jupyter.widget-view+json": {
1252
+ "model_id": "7f78061d12674ad286882e3856f4e638",
1253
+ "version_major": 2,
1254
+ "version_minor": 0
1255
+ },
1256
+ "text/plain": [
1257
+ "Backend: 0%| | 0/1 [00:00<?, ?it/s]"
1258
+ ]
1259
+ },
1260
+ "metadata": {},
1261
+ "output_type": "display_data"
1262
+ },
1263
+ {
1264
+ "name": "stdout",
1265
+ "output_type": "stream",
1266
+ "text": [
1267
+ "AOT: Success=1, Failed=0\n"
1268
+ ]
1269
+ }
1270
+ ],
1271
+ "source": [
1272
+ " import os\n",
1273
+ " from ai_edge_quantizer import quantizer as aie_quantizer\n",
1274
+ " from ai_edge_litert.aot import aot_compile as aot_lib\n",
1275
+ " from ai_edge_litert.aot.vendors.qualcomm import target as qnn_target\n",
1276
+ "\n",
1277
+ " SRC = \"/workspace/sdxl_tflite/clip.tflite\"\n",
1278
+ " TEST_DIR = \"/tmp/sdxl_int8_blockwise_test\"\n",
1279
+ " os.makedirs(TEST_DIR, exist_ok=True)\n",
1280
+ "\n",
1281
+ " # INT8 BLOCKWISE_32\n",
1282
+ " qt = aie_quantizer.Quantizer(SRC)\n",
1283
+ " qt.update_quantization_recipe(\n",
1284
+ " regex=\".*\",\n",
1285
+ " operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
1286
+ " algorithm_key=\"min_max_uniform_quantize\",\n",
1287
+ " op_config=aie_quantizer.qtyping.OpQuantizationConfig(\n",
1288
+ " weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
1289
+ " num_bits=8,\n",
1290
+ " symmetric=True,\n",
1291
+ " granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
1292
+ " dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
1293
+ " ),\n",
1294
+ " compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
1295
+ " explicit_dequantize=False,\n",
1296
+ " ),\n",
1297
+ " )\n",
1298
+ " result = qt.quantize()\n",
1299
+ " out_path = f\"{TEST_DIR}/clip_int8_bw32.tflite\"\n",
1300
+ " with open(out_path, \"wb\") as f:\n",
1301
+ " f.write(result.quantized_model)\n",
1302
+ "\n",
1303
+ " orig_mb = os.path.getsize(SRC) / 1024 / 1024\n",
1304
+ " quant_mb = os.path.getsize(out_path) / 1024 / 1024\n",
1305
+ " print(f\"Quantize: {orig_mb:.1f} MB -> {quant_mb:.1f} MB ({quant_mb/orig_mb*100:.0f}%)\")\n",
1306
+ "\n",
1307
+ " # AOT\n",
1308
+ " sm8850_target = qnn_target.Target(qnn_target.SocModel.SM8850)\n",
1309
+ " aot_result = aot_lib.aot_compile(\n",
1310
+ " out_path,\n",
1311
+ " target=[sm8850_target],\n",
1312
+ " keep_going=True,\n",
1313
+ " )\n",
1314
+ " print(f\"AOT: Success={len(aot_result.models_with_backend)}, Failed={len(aot_result.failed_backends)}\")\n",
1315
+ " for backend, error in aot_result.failed_backends:\n",
1316
+ " print(f\"Error: {error[:300]}\")"
1317
+ ]
1318
+ },
1319
+ {
1320
+ "cell_type": "code",
1321
+ "execution_count": null,
1322
+ "id": "34754c5d-7180-4f3a-9171-de27988a28d9",
1323
+ "metadata": {},
1324
+ "outputs": [
1325
+ {
1326
+ "name": "stderr",
1327
+ "output_type": "stream",
1328
+ "text": [
1329
+ "/usr/local/lib/python3.12/dist-packages/tensorflow/lite/python/interpreter.py:457: UserWarning: Warning: tf.lite.Interpreter is deprecated and is scheduled for deletion in\n",
1330
+ " TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.\n",
1331
+ " See the [migration guide](https://ai.google.dev/edge/litert/migration)\n",
1332
+ " for details.\n",
1333
+ " \n",
1334
+ " warnings.warn(_INTERPRETER_DELETION_WARNING)\n"
1335
+ ]
1336
+ }
1337
+ ],
1338
+ "source": [
1339
+ " # 양자화 전후 모델 내부 확인\n",
1340
+ " import tensorflow as tf\n",
1341
+ "\n",
1342
+ " orig = tf.lite.Interpreter(model_path=\"/workspace/sdxl_tflite/clip.tflite\")\n",
1343
+ " orig.allocate_tensors()\n",
1344
+ "\n",
1345
+ " quant = tf.lite.Interpreter(model_path=\"/tmp/sdxl_int8_blockwise_test/clip_int8_bw32.tflite\")\n",
1346
+ " quant.allocate_tensors()\n",
1347
+ "\n",
1348
+ " # 가중치 텐서 dtype 비교\n",
1349
+ " print(\"=== Original ===\")\n",
1350
+ " for t in orig.get_tensor_details()[:5]:\n",
1351
+ " print(f\" {t['name'][:50]:50s} dtype={t['dtype']}\")\n",
1352
+ "\n",
1353
+ " print(\"\\n=== INT8 blockwise ===\")\n",
1354
+ " for t in quant.get_tensor_details()[:5]:\n",
1355
+ " print(f\" {t['name'][:50]:50s} dtype={t['dtype']}\")\n",
1356
+ "\n",
1357
+ " # INT4 것도 비교\n",
1358
+ " int4 = tf.lite.Interpreter(model_path=\"/tmp/sdxl_int4_test/clip_int4.tflite\")\n",
1359
+ " int4.allocate_tensors()\n",
1360
+ " print(\"\\n=== INT4 blockwise ===\")\n",
1361
+ " for t in int4.get_tensor_details()[:5]:\n",
1362
+ " print(f\" {t['name'][:50]:50s} dtype={t['dtype']}\")"
1363
+ ]
1364
+ },
1365
+ {
1366
+ "cell_type": "code",
1367
+ "execution_count": 2,
1368
+ "id": "98db8f3d-1099-468a-87b2-e7f60431b948",
1369
+ "metadata": {},
1370
+ "outputs": [
1371
+ {
1372
+ "name": "stdout",
1373
+ "output_type": "stream",
1374
+ "text": [
1375
+ "Original: 469.6 MB\n",
1376
+ "Quantized bytes: 469.6 MB\n"
1377
+ ]
1378
+ },
1379
+ {
1380
+ "name": "stderr",
1381
+ "output_type": "stream",
1382
+ "text": [
1383
+ "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:311: RuntimeWarning: invalid value encountered in divide\n",
1384
+ " ret = np.divide(tensor_data, scales)\n",
1385
+ "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:98: RuntimeWarning: invalid value encountered in cast\n",
1386
+ " return tensor.astype(qtype)\n"
1387
+ ]
1388
+ },
1389
+ {
1390
+ "name": "stdout",
1391
+ "output_type": "stream",
1392
+ "text": [
1393
+ "INT4 quantized bytes: 66.8 MB\n"
1394
+ ]
1395
+ }
1396
+ ],
1397
+ "source": [
1398
+ " import os\n",
1399
+ "\n",
1400
+ " # 양자화만 다시 해서 확인 (AOT 안 거침)\n",
1401
+ " from ai_edge_quantizer import quantizer as aie_quantizer\n",
1402
+ "\n",
1403
+ " SRC = \"/workspace/sdxl_tflite/clip.tflite\"\n",
1404
+ "\n",
1405
+ " # INT8 blockwise 양자화만\n",
1406
+ " qt = aie_quantizer.Quantizer(SRC)\n",
1407
+ " qt.update_quantization_recipe(\n",
1408
+ " regex=\".*\",\n",
1409
+ " operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
1410
+ " algorithm_key=\"min_max_uniform_quantize\",\n",
1411
+ " op_config=aie_quantizer.qtyping.OpQuantizationConfig(\n",
1412
+ " weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
1413
+ " num_bits=8,\n",
1414
+ " symmetric=True,\n",
1415
+ " granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
1416
+ " dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
1417
+ " ),\n",
1418
+ " compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
1419
+ " explicit_dequantize=False,\n",
1420
+ " ),\n",
1421
+ " )\n",
1422
+ " result = qt.quantize()\n",
1423
+ "\n",
1424
+ " # 양자화 결과 통계 확인\n",
1425
+ " print(f\"Original: {os.path.getsize(SRC) / 1024 / 1024:.1f} MB\")\n",
1426
+ " print(f\"Quantized bytes: {len(result.quantized_model) / 1024 / 1024:.1f} MB\")\n",
1427
+ "\n",
1428
+ " # 양자화 로그 확인\n",
1429
+ " if hasattr(result, 'log'):\n",
1430
+ " print(f\"Log: {result.log}\")\n",
1431
+ " if hasattr(result, 'skipped_ops'):\n",
1432
+ " print(f\"Skipped: {result.skipped_ops}\")\n",
1433
+ "\n",
1434
+ " # 비교: INT4도 같은 방식으로 크기 확인\n",
1435
+ " qt4 = aie_quantizer.Quantizer(SRC)\n",
1436
+ " qt4.update_quantization_recipe(\n",
1437
+ " regex=\".*\",\n",
1438
+ " operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
1439
+ " algorithm_key=\"min_max_uniform_quantize\",\n",
1440
+ " op_config=aie_quantizer.qtyping.OpQuantizationConfig(\n",
1441
+ " weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
1442
+ " num_bits=4,\n",
1443
+ " symmetric=True,\n",
1444
+ " granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
1445
+ " dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
1446
+ " ),\n",
1447
+ " compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
1448
+ " explicit_dequantize=False,\n",
1449
+ " ),\n",
1450
+ " )\n",
1451
+ " result4 = qt4.quantize()\n",
1452
+ " print(f\"INT4 quantized bytes: {len(result4.quantized_model) / 1024 / 1024:.1f} MB\")\n"
1453
+ ]
1454
+ },
1455
+ {
1456
+ "cell_type": "code",
1457
+ "execution_count": null,
1458
+ "id": "98d994de-58ff-4741-9521-f8d0823ff089",
1459
+ "metadata": {},
1460
+ "outputs": [],
1461
+ "source": []
1462
+ }
1463
+ ],
1464
+ "metadata": {
1465
+ "kernelspec": {
1466
+ "display_name": "Python 3 (ipykernel)",
1467
+ "language": "python",
1468
+ "name": "python3"
1469
+ },
1470
+ "language_info": {
1471
+ "codemirror_mode": {
1472
+ "name": "ipython",
1473
+ "version": 3
1474
+ },
1475
+ "file_extension": ".py",
1476
+ "mimetype": "text/x-python",
1477
+ "name": "python",
1478
+ "nbconvert_exporter": "python",
1479
+ "pygments_lexer": "ipython3",
1480
+ "version": "3.12.3"
1481
+ }
1482
+ },
1483
+ "nbformat": 4,
1484
+ "nbformat_minor": 5
1485
+ }
sdxl_porting_example.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
sdxl_tflite/cfg_15.0.png ADDED

Git LFS Details

  • SHA256: 86b78e3b8c7a22cc5b7866f47fdfa5ad24471a1b1a3cbf0f275f1d820ca0af3e
  • Pointer size: 132 Bytes
  • Size of remote file: 1.91 MB
sdxl_tflite/cfg_2.0.png ADDED

Git LFS Details

  • SHA256: d16397bb98187cbe3be3a5a99bdc1f97a67e8902a3819aead090cabb8b260328
  • Pointer size: 132 Bytes
  • Size of remote file: 1.89 MB
sdxl_tflite/cfg_7.5.png ADDED

Git LFS Details

  • SHA256: aecf3d3f0fef0bdbe3d83b68f680ff9da24f1e59285b512e2f52364d5177de03
  • Pointer size: 132 Bytes
  • Size of remote file: 1.85 MB
sdxl_tflite/cfg_comparison.png ADDED

Git LFS Details

  • SHA256: 9780b2188638568b5dd096675b1aaa7c0953f5cd41eb5ca459d879d1587c2059
  • Pointer size: 132 Bytes
  • Size of remote file: 1.2 MB
sdxl_tflite/clip.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3573a0b57173b7753267a50a720dea04b7de6993ee3b279ce9d7935b2d603e3
3
+ size 492412148
sdxl_tflite/decoder.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20134f17c83dfb234a1121ffb30490aaaf4e199286da2e09673fe15a9a0a98ed
3
+ size 198431096
sdxl_tflite/generated_image.jpg ADDED

Git LFS Details

  • SHA256: 3ebf5caec793ef5d919f781e80fc874cc9ff4e36b08783fd3af73e4e1451939f
  • Pointer size: 131 Bytes
  • Size of remote file: 114 kB
sdxl_tflite/playground_00.png ADDED

Git LFS Details

  • SHA256: 6e30464558bc46d5970fc93d9c563f053d34aa01c239157bd4ebfffd226194ba
  • Pointer size: 132 Bytes
  • Size of remote file: 1.6 MB
sdxl_tflite/playground_01.png ADDED

Git LFS Details

  • SHA256: df7aedf498b10d2543f0ca59d484fa0fc720781f6a3abcd0f215467adc5434f6
  • Pointer size: 132 Bytes
  • Size of remote file: 1.4 MB
sdxl_tflite/playground_02.png ADDED

Git LFS Details

  • SHA256: e526d321dab805b09554517f0d6777b6f0dc5c44e8a2a03df94aff29eb750f80
  • Pointer size: 132 Bytes
  • Size of remote file: 1.83 MB
sdxl_tflite/playground_03.png ADDED

Git LFS Details

  • SHA256: 0665b7413e48d08fc7b0edd1707cabe3e6bf200a6e7b1f0b7fe358b2dd714d12
  • Pointer size: 132 Bytes
  • Size of remote file: 1.93 MB
sdxl_tflite/playground_grid.png ADDED

Git LFS Details

  • SHA256: ed54dbbec18f8915a09b38c89e0ecade1422590f93701e7bd87a528935c8fa04
  • Pointer size: 132 Bytes
  • Size of remote file: 1.62 MB
sdxl_tflite/sampler_comparison.png ADDED

Git LFS Details

  • SHA256: 617fb5177d33dee2314384f34034c4f0bf60f2b270ffbc9da219905796284f17
  • Pointer size: 132 Bytes
  • Size of remote file: 1.46 MB
sdxl_tflite/sampler_k_euler.png ADDED

Git LFS Details

  • SHA256: fe88101a37201f3e3bdd595f576b66cfe48ce209b1e996a5456f6e6cc66c5e1f
  • Pointer size: 132 Bytes
  • Size of remote file: 2.23 MB
sdxl_tflite/sampler_k_euler_ancestral.png ADDED

Git LFS Details

  • SHA256: e05ba0f1eb2aa7161deba099c18c3d3269ba98b5ea8840d5b41e7797e41172d9
  • Pointer size: 132 Bytes
  • Size of remote file: 2.02 MB
sdxl_tflite/sampler_k_lms.png ADDED

Git LFS Details

  • SHA256: 0766438f2eeae2d588ad65023359f331fdd24891249d5d54e875aeaff2e6dfbb
  • Pointer size: 132 Bytes
  • Size of remote file: 2.16 MB
sdxl_tflite/text_projection.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19c2977d1573888208415c96531f65558cdeb76c9a5e1bcc8a3d1df6a55022f8
3
+ size 6553728
sdxl_tflite_aot_int4_blockwise_32/_compiled_models/diffusion_int4_Qualcomm_SM8850_apply_plugin.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c467a24fc1ae7aa397e74f0480ba981bdebe4b94b5808cb6a220bc350816acf
3
+ size 2587049280
sdxl_tflite_fp16/_compiled_models/diffusion_Qualcomm_SM8850_apply_plugin.tflite ADDED
File without changes
sdxl_tflite_fp16/clip.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57feae823b13c5040150bcd8fd3f73285462cdc0892106991aecf44cecbe084a
3
+ size 246676112
sdxl_tflite_fp16/decoder.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:280cd22872a0f36416e5690e8a546fb91b33992e77fc6b65da0c5f8c4f11a57e
3
+ size 99544864
sdxl_tflite_fp16/diffusion.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5909a6b20f694751dfd05249327790892785aed03b653918bf28320557c2ad78
3
+ size 5144262816
sdxl_tflite_fp16/open_clip.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45e459f77df8a0a8cc366a0e9b7cf00c3fa0ad3e6acb6ca91bd847c4cd2a3627
3
+ size 1387438216
sdxl_tflite_fp16/text_projection.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce2d9bc5c2b839e18fc4ab2a2f70e4f7840c9b6ff646f0881b0cd15e04c8c48d
3
+ size 6553600
sdxl_tflite_fp16/text_projection.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19c2977d1573888208415c96531f65558cdeb76c9a5e1bcc8a3d1df6a55022f8
3
+ size 6553728
sdxl_tflite_quantized/_compiled_models/clip_Qualcomm_SM8850_apply_plugin.tflite ADDED
File without changes
sdxl_tflite_quantized/_compiled_models/decoder_Qualcomm_SM8850_apply_plugin.tflite ADDED
File without changes
sdxl_tflite_quantized/_compiled_models/diffusion_Qualcomm_SM8850_apply_plugin.tflite ADDED
File without changes
sdxl_tflite_quantized/_compiled_models/open_clip_Qualcomm_SM8850_apply_plugin.tflite ADDED
File without changes
sdxl_tflite_quantized/clip.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2eb43f4edab9ca36792980b6a466ea5f1810e0fa15cdc301e930a19b65f4f3c
3
+ size 125385272
sdxl_tflite_quantized/decoder.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7c953730ea9d3733664f63832dbff9c2ab782c7d7dc1362d66a2700bcf4fea9
3
+ size 50269912
sdxl_tflite_quantized/diffusion.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0acb90bca9400caa200d5d11aa557ff05ccf07d26825d2c4e66daf11b2bec931
3
+ size 2596327472
sdxl_tflite_quantized/open_clip.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d28b58774329f31487b76aee6bfde7870e5cf5d7b3111388649ff9d03444752
3
+ size 700069088