IHaBiS commited on Mar 18

Commit

fdd97e4

verified ·

1 Parent(s): 40f7209

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +14 -0
.ipynb_checkpoints/Untitled-checkpoint.ipynb +6 -0
.ipynb_checkpoints/sdxl_int4_aot_quantization-checkpoint.ipynb +771 -0
.ipynb_checkpoints/sdxl_porting_example-checkpoint.ipynb +0 -0
Untitled.ipynb +159 -0
sdxl_ai_pack/device_targeting_configuration.xml +11 -0
sdxl_ai_pack/sdxl_clip_mtk/src/main/assets/model#group_Qualcomm_SM8850/placeholder.txt +0 -0
sdxl_ai_pack/sdxl_clip_mtk/src/main/assets/model#group_other/placeholder.txt +0 -0
sdxl_ai_pack/sdxl_decoder_mtk/src/main/assets/model#group_Qualcomm_SM8850/placeholder.txt +0 -0
sdxl_ai_pack/sdxl_decoder_mtk/src/main/assets/model#group_other/placeholder.txt +0 -0
sdxl_ai_pack/sdxl_diffusion/src/main/assets/model#group_Qualcomm_SM8850/diffusion.tflite +3 -0
sdxl_ai_pack/sdxl_diffusion_mtk/src/main/assets/model#group_Qualcomm_SM8850/placeholder.txt +0 -0
sdxl_ai_pack/sdxl_diffusion_mtk/src/main/assets/model#group_other/placeholder.txt +0 -0
sdxl_ai_pack/sdxl_open_clip_mtk/src/main/assets/model#group_Qualcomm_SM8850/placeholder.txt +0 -0
sdxl_ai_pack/sdxl_open_clip_mtk/src/main/assets/model#group_other/placeholder.txt +0 -0
sdxl_int4_aot_quantization.ipynb +1485 -0
sdxl_porting_example.ipynb +0 -0
sdxl_tflite/cfg_15.0.png +3 -0
sdxl_tflite/cfg_2.0.png +3 -0
sdxl_tflite/cfg_7.5.png +3 -0
sdxl_tflite/cfg_comparison.png +3 -0
sdxl_tflite/clip.tflite +3 -0
sdxl_tflite/decoder.tflite +3 -0
sdxl_tflite/generated_image.jpg +3 -0
sdxl_tflite/playground_00.png +3 -0
sdxl_tflite/playground_01.png +3 -0
sdxl_tflite/playground_02.png +3 -0
sdxl_tflite/playground_03.png +3 -0
sdxl_tflite/playground_grid.png +3 -0
sdxl_tflite/sampler_comparison.png +3 -0
sdxl_tflite/sampler_k_euler.png +3 -0
sdxl_tflite/sampler_k_euler_ancestral.png +3 -0
sdxl_tflite/sampler_k_lms.png +3 -0
sdxl_tflite/text_projection.npy +3 -0
sdxl_tflite_aot_int4_blockwise_32/_compiled_models/diffusion_int4_Qualcomm_SM8850_apply_plugin.tflite +3 -0
sdxl_tflite_fp16/_compiled_models/diffusion_Qualcomm_SM8850_apply_plugin.tflite +0 -0
sdxl_tflite_fp16/clip.tflite +3 -0
sdxl_tflite_fp16/decoder.tflite +3 -0
sdxl_tflite_fp16/diffusion.tflite +3 -0
sdxl_tflite_fp16/open_clip.tflite +3 -0
sdxl_tflite_fp16/text_projection.bin +3 -0
sdxl_tflite_fp16/text_projection.npy +3 -0
sdxl_tflite_quantized/_compiled_models/clip_Qualcomm_SM8850_apply_plugin.tflite +0 -0
sdxl_tflite_quantized/_compiled_models/decoder_Qualcomm_SM8850_apply_plugin.tflite +0 -0
sdxl_tflite_quantized/_compiled_models/diffusion_Qualcomm_SM8850_apply_plugin.tflite +0 -0
sdxl_tflite_quantized/_compiled_models/open_clip_Qualcomm_SM8850_apply_plugin.tflite +0 -0
sdxl_tflite_quantized/clip.tflite +3 -0
sdxl_tflite_quantized/decoder.tflite +3 -0
sdxl_tflite_quantized/diffusion.tflite +3 -0
sdxl_tflite_quantized/open_clip.tflite +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,17 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+sdxl_tflite/generated_image.jpg filter=lfs diff=lfs merge=lfs -text
+sdxl_tflite/playground_00.png filter=lfs diff=lfs merge=lfs -text
+sdxl_tflite/playground_01.png filter=lfs diff=lfs merge=lfs -text
+sdxl_tflite/playground_02.png filter=lfs diff=lfs merge=lfs -text
+sdxl_tflite/playground_03.png filter=lfs diff=lfs merge=lfs -text
+sdxl_tflite/playground_grid.png filter=lfs diff=lfs merge=lfs -text
+sdxl_tflite/cfg_2.0.png filter=lfs diff=lfs merge=lfs -text
+sdxl_tflite/cfg_7.5.png filter=lfs diff=lfs merge=lfs -text
+sdxl_tflite/cfg_15.0.png filter=lfs diff=lfs merge=lfs -text
+sdxl_tflite/cfg_comparison.png filter=lfs diff=lfs merge=lfs -text
+sdxl_tflite/sampler_k_euler.png filter=lfs diff=lfs merge=lfs -text
+sdxl_tflite/sampler_k_euler_ancestral.png filter=lfs diff=lfs merge=lfs -text
+sdxl_tflite/sampler_k_lms.png filter=lfs diff=lfs merge=lfs -text
+sdxl_tflite/sampler_comparison.png filter=lfs diff=lfs merge=lfs -text

.ipynb_checkpoints/Untitled-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

.ipynb_checkpoints/sdxl_int4_aot_quantization-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,771 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "30781904-bd35-473d-9016-1f33ed9f036f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "==================================================\n",
+      "[clip] Quantizing...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:311: RuntimeWarning: invalid value encountered in divide\n",
+      "  ret = np.divide(tensor_data, scales)\n",
+      "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:98: RuntimeWarning: invalid value encountered in cast\n",
+      "  return tensor.astype(qtype)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[clip] Size: 469.6 MB -> 66.8 MB (14%)\n",
+      "[clip] AOT compiling for SM8850...\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d5ab30865c774458b49fb8b7586e314f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Backend:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[clip] AOT: Success=1, Failed=0\n",
+      "\n",
+      "==================================================\n",
+      "[open_clip] Quantizing...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:311: RuntimeWarning: divide by zero encountered in divide\n",
+      "  ret = np.divide(tensor_data, scales)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[open_clip] Size: 2643.7 MB -> 374.0 MB (14%)\n",
+      "[open_clip] AOT compiling for SM8850...\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0f7cde62157f4eb48338fedab0d8747a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Backend:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[open_clip] AOT: Success=1, Failed=0\n",
+      "\n",
+      "==================================================\n",
+      "[diffusion] Quantizing...\n",
+      "[diffusion] Size: 9799.6 MB -> 2480.8 MB (25%)\n",
+      "[diffusion] AOT compiling for SM8850...\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "28a7fba90a074601b1834938352c8193",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Backend:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "  import os\n",
+    "  from ai_edge_quantizer import quantizer as aie_quantizer\n",
+    "  from ai_edge_litert.aot import aot_compile as aot_lib\n",
+    "  from ai_edge_litert.aot.vendors.qualcomm import target as qnn_target\n",
+    "\n",
+    "  SRC_DIR = \"/workspace/sdxl_tflite\"\n",
+    "  OUT_DIR = \"/workspace/sdxl_tflite_aot_int4_blockwise_32\"\n",
+    "  os.makedirs(OUT_DIR, exist_ok=True)\n",
+    "\n",
+    "  COMPONENTS = [\"clip\", \"open_clip\", \"diffusion\", \"decoder\"]\n",
+    "  sm8850_target = qnn_target.Target(qnn_target.SocModel.SM8850)\n",
+    "\n",
+    "  int4_config = aie_quantizer.qtyping.OpQuantizationConfig(\n",
+    "      weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
+    "          num_bits=4,\n",
+    "          symmetric=True,\n",
+    "          granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
+    "          dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
+    "      ),\n",
+    "      compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
+    "      explicit_dequantize=False,\n",
+    "  )\n",
+    "\n",
+    "  results = {}\n",
+    "\n",
+    "  for name in COMPONENTS:\n",
+    "      src = f\"{SRC_DIR}/{name}.tflite\"\n",
+    "      int4_path = f\"{OUT_DIR}/{name}_int4.tflite\"\n",
+    "      print(f\"\\n{'='*50}\")\n",
+    "      print(f\"[{name}] Quantizing...\")\n",
+    "\n",
+    "      qt = aie_quantizer.Quantizer(src)\n",
+    "      qt.update_quantization_recipe(\n",
+    "          regex=\".*\",\n",
+    "          operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
+    "          algorithm_key=\"min_max_uniform_quantize\",\n",
+    "          op_config=int4_config,\n",
+    "      )\n",
+    "      quant_result = qt.quantize()\n",
+    "      with open(int4_path, \"wb\") as f:\n",
+    "          f.write(quant_result.quantized_model)\n",
+    "\n",
+    "      orig_mb = os.path.getsize(src) / 1024 / 1024\n",
+    "      quant_mb = os.path.getsize(int4_path) / 1024 / 1024\n",
+    "      print(f\"[{name}] Size: {orig_mb:.1f} MB -> {quant_mb:.1f} MB ({quant_mb/orig_mb*100:.0f}%)\")\n",
+    "\n",
+    "      print(f\"[{name}] AOT compiling for SM8850...\")\n",
+    "      aot_result = aot_lib.aot_compile(\n",
+    "          int4_path,\n",
+    "          output_dir=OUT_DIR,\n",
+    "          target=[sm8850_target],\n",
+    "          keep_going=True,\n",
+    "      )\n",
+    "\n",
+    "      success = len(aot_result.models_with_backend)\n",
+    "      failed = len(aot_result.failed_backends)\n",
+    "      results[name] = {\"success\": success, \"failed\": failed}\n",
+    "      print(f\"[{name}] AOT: Success={success}, Failed={failed}\")\n",
+    "      for backend, error in aot_result.failed_backends:\n",
+    "          print(f\"[{name}] Error: {error[:300]}\")\n",
+    "\n",
+    "  print(f\"\\n{'='*50}\")\n",
+    "  print(\"Summary:\")\n",
+    "  for name, r in results.items():\n",
+    "      status = \"OK\" if r[\"success\"] > 0 else \"FAIL\"\n",
+    "      print(f\"  {name}: {status}\")\n",
+    "  print(f\"\\nOutput: {OUT_DIR}\")\n",
+    "  print(f\"Files: {os.listdir(OUT_DIR)}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3936e5c5-ceeb-4cc3-819c-702a38a89bf0",
+   "metadata": {},
+   "source": [
+    "# AOT Convertion Code\n",
+    "\n",
+    "위는 완성된 코드, 아래는 테스트용 잡다한거"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "a87c3a83-7811-465e-9079-bced67cbb82e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['Quantizer', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'algorithm_manager', 'algorithm_manager_api', 'algorithms', 'calibrator', 'default_policy', 'model_modifier', 'model_validator', 'params_generator', 'qtyping', 'quantizer', 'recipe', 'recipe_manager', 'transformation_instruction_generator', 'transformation_performer', 'transformations', 'utils']\n",
+      "['__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__']\n"
+     ]
+    }
+   ],
+   "source": [
+    "  # ai_edge_quantizer에 AieQuantizerT 구현체가 있는지 확인\n",
+    "  import ai_edge_quantizer\n",
+    "  print(dir(ai_edge_quantizer))\n",
+    "\n",
+    "  # 혹시 ai_edge_litert.aot 쪽에 있는지도 확인\n",
+    "  import ai_edge_litert.aot\n",
+    "  print(dir(ai_edge_litert.aot))\n",
+    "\n",
+    "  # components에서 import 가능한 quantizer 구현체 확인\n",
+    "  from ai_edge_litert.aot.core import components\n",
+    "  import inspect\n",
+    "  for name, obj in inspect.getmembers(components):\n",
+    "      if inspect.isclass(obj) and issubclass(obj, components.AieQuantizerT) and obj != components.AieQuantizerT:\n",
+    "          print(f\"Found: {name}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "b7f7c046-e555-417d-b877-60cf9845d2dc",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.12/dist-packages/torch/distributed/distributed_c10d.py:351: UserWarning: Device capability of jax unspecified, assuming `cpu` and `cuda`. Please specify it via the `devices` argument of `register_backend`.\n",
+      "  warnings.warn(\n",
+      "An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.\n",
+      "Skipping import of cpp extensions due to incompatible torch version 2.8.0+cu128 for torchao version 0.16.0             Please see https://github.com/pytorch/ao/issues/2919 for more info\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INT4 BLOCKWISE_32 verify passed: QuantConfig(pt2e_quantizer=None, generative_recipe=GenerativeQuantRecipe(\n",
+      "  Default: (a:FP32, w:INT4, DYNAMIC_RANGE, MIN_MAX, BLOCKWISE_32, )\n",
+      "  Embedding: None\n",
+      "  Attention: None\n",
+      "  Feedforward: None\n",
+      "), _quantizer_mode=<_QuantizerMode.AI_EDGE_QUANTIZER: 4>)\n"
+     ]
+    }
+   ],
+   "source": [
+    "  from litert_torch.generative.quantize import quant_attrs, quant_recipes\n",
+    "\n",
+    "  # INT4 BLOCKWISE_32 (DYNAMIC_RANGE 모드)\n",
+    "  quant_config = quant_recipes.full_dynamic_recipe(\n",
+    "      weight_dtype=quant_attrs.Dtype.INT4,\n",
+    "      granularity=quant_attrs.Granularity.BLOCKWISE_32,\n",
+    "  )\n",
+    "  print(\"INT4 BLOCKWISE_32 verify passed:\", quant_config)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "e8c3a18d-09fa-4c3f-8e92-83937f9a97de",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "QuantConfig(pt2e_quantizer=None, generative_recipe=GenerativeQuantRecipe(\n",
+      "  Default: (a:FP32, w:INT4, DYNAMIC_RANGE, MIN_MAX, BLOCKWISE_32, )\n",
+      "  Embedding: None\n",
+      "  Attention: None\n",
+      "  Feedforward: None\n",
+      "), _quantizer_mode=<_QuantizerMode.AI_EDGE_QUANTIZER: 4>)\n"
+     ]
+    }
+   ],
+   "source": [
+    "  from litert_torch.generative.quantize import quant_attrs, quant_recipes\n",
+    "\n",
+    "  quant_config = quant_recipes.full_dynamic_recipe(\n",
+    "      weight_dtype=quant_attrs.Dtype.INT4,\n",
+    "      granularity=quant_attrs.Granularity.BLOCKWISE_32,\n",
+    "  )\n",
+    "  print(quant_config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "4619e935-a84e-4c01-ac96-ede6988f7e52",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'clip_loaded' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mNameError\u001b[39m                                 Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[6]\u001b[39m\u001b[32m, line 19\u001b[39m\n\u001b[32m     14\u001b[39m os.makedirs(TEST_DIR, exist_ok=\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[32m     16\u001b[39m prompt_tokens = torch.full((\u001b[32m1\u001b[39m, \u001b[32m77\u001b[39m), \u001b[32m0\u001b[39m, dtype=torch.int)  \u001b[38;5;66;03m# N_TOKENS=77\u001b[39;00m\n\u001b[32m     18\u001b[39m litert_torch.signature(\n\u001b[32m---> \u001b[39m\u001b[32m19\u001b[39m     \u001b[33m\"\u001b[39m\u001b[33mencode\u001b[39m\u001b[33m\"\u001b[39m, \u001b[43mclip_loaded\u001b[49m, (prompt_tokens,)\n\u001b[32m     20\u001b[39m ).convert(quant_config=quant_config).export(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mTEST_DIR\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m/clip_int4.tflite\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m     21\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mINT4 clip conversion done\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m     23\u001b[39m \u001b[38;5;66;03m# AOT 컴파일 테스트\u001b[39;00m\n",
+      "\u001b[31mNameError\u001b[39m: name 'clip_loaded' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "  import os\n",
+    "  import torch\n",
+    "  from litert_torch.generative.quantize import quant_attrs, quant_recipes\n",
+    "  import litert_torch\n",
+    "\n",
+    "  # INT4 BLOCKWISE_32 config\n",
+    "  quant_config = quant_recipes.full_dynamic_recipe(\n",
+    "      weight_dtype=quant_attrs.Dtype.INT4,\n",
+    "      granularity=quant_attrs.Granularity.BLOCKWISE_32,\n",
+    "  )\n",
+    "\n",
+    "  # clip 모델만 변환 (가장 가벼움)\n",
+    "  TEST_DIR = \"/tmp/sdxl_int4_test\"\n",
+    "  os.makedirs(TEST_DIR, exist_ok=True)\n",
+    "\n",
+    "  prompt_tokens = torch.full((1, 77), 0, dtype=torch.int)  # N_TOKENS=77\n",
+    "\n",
+    "  litert_torch.signature(\n",
+    "      \"encode\", clip_loaded, (prompt_tokens,)\n",
+    "  ).convert(quant_config=quant_config).export(f\"{TEST_DIR}/clip_int4.tflite\")\n",
+    "  print(\"INT4 clip conversion done\")\n",
+    "\n",
+    "  # AOT 컴파일 테스트\n",
+    "  from ai_edge_litert.aot import aot_compile as aot_lib\n",
+    "  from ai_edge_litert.aot.vendors.qualcomm import target as qnn_target\n",
+    "\n",
+    "  sm8850_target = qnn_target.Target(qnn_target.SocModel.SM8850)\n",
+    "  result = aot_lib.aot_compile(\n",
+    "      f\"{TEST_DIR}/clip_int4.tflite\",\n",
+    "      target=[sm8850_target],\n",
+    "      keep_going=True,\n",
+    "  )\n",
+    "\n",
+    "  print(f\"Success: {len(result.models_with_backend)}\")\n",
+    "  print(f\"Failed: {len(result.failed_backends)}\")\n",
+    "  for backend, error in result.failed_backends:\n",
+    "      print(f\"  {backend.target}: {error[:200]}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "429c16cb-c8bf-4d66-ae08-e7fb0b35d0b3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/tmp/sdxl_tflite: ['clip.tflite', 'open_clip.tflite', 'diffusion.tflite', 'decoder.tflite', 'text_projection.npy', 'generated_image.jpg', 'playground_00.png', 'playground_01.png', 'playground_02.png', 'playground_03.png', 'playground_grid.png', 'cfg_2.0.png', 'cfg_7.5.png', 'cfg_15.0.png', 'cfg_comparison.png', 'sampler_k_euler.png', 'sampler_k_euler_ancestral.png', 'sampler_k_lms.png', 'sampler_comparison.png']\n",
+      "/workspace/sdxl_tflite: ['open_clip.tflite', 'diffusion.tflite', 'decoder.tflite', 'text_projection.npy', 'generated_image.jpg', 'playground_00.png', 'playground_01.png', 'playground_02.png', 'playground_03.png', 'playground_grid.png', 'cfg_2.0.png', 'cfg_7.5.png', 'cfg_15.0.png', 'cfg_comparison.png', 'sampler_k_euler.png', 'sampler_k_euler_ancestral.png', 'sampler_k_lms.png', 'sampler_comparison.png', 'clip.tflite', '.ipynb_checkpoints']\n"
+     ]
+    }
+   ],
+   "source": [
+    "  import os\n",
+    "  # 이전 변환 결과가 남아있는지 확인\n",
+    "  for d in [\"/tmp/sdxl_tflite\", \"/tmp/sdxl_tflite_quantized\", \"/workspace/sdxl_tflite\"]:\n",
+    "      if os.path.exists(d):\n",
+    "          files = os.listdir(d)\n",
+    "          print(f\"{d}: {files}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "70087fe5-d3e8-4bf2-ba1c-92c128d88a0c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:311: RuntimeWarning: invalid value encountered in divide\n",
+      "  ret = np.divide(tensor_data, scales)\n",
+      "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:98: RuntimeWarning: invalid value encountered in cast\n",
+      "  return tensor.astype(qtype)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INT4 blockwise clip: 66.8 MB\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "27fdf6c884d9490f8909bf33a088a1c4",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Backend:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Success: 1\n",
+      "Failed: 0\n"
+     ]
+    }
+   ],
+   "source": [
+    "  from ai_edge_quantizer import quantizer as aie_quantizer\n",
+    "  from ai_edge_quantizer import recipe as aie_recipe\n",
+    "  from ai_edge_litert.aot import aot_compile as aot_lib\n",
+    "  from ai_edge_litert.aot.vendors.qualcomm import target as qnn_target\n",
+    "  import os\n",
+    "\n",
+    "  # 1. FP32 clip을 INT4 blockwise로 양자화\n",
+    "  SRC = \"/tmp/sdxl_tflite/clip.tflite\"\n",
+    "  TEST_DIR = \"/tmp/sdxl_int4_test\"\n",
+    "  os.makedirs(TEST_DIR, exist_ok=True)\n",
+    "\n",
+    "  qt.update_quantization_recipe(\n",
+    "      regex=\".*\",\n",
+    "      operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
+    "      algorithm_key=\"min_max_uniform_quantize\",\n",
+    "      op_config=aie_quantizer.qtyping.OpQuantizationConfig(\n",
+    "          weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
+    "              num_bits=4,\n",
+    "              symmetric=True,\n",
+    "              granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
+    "              dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
+    "          ),\n",
+    "          compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
+    "          explicit_dequantize=False,\n",
+    "      ),\n",
+    "  )\n",
+    "  result = qt.quantize()\n",
+    "  int4_path = f\"{TEST_DIR}/clip_int4.tflite\"\n",
+    "  with open(int4_path, \"wb\") as f:\n",
+    "      f.write(result.quantized_model)\n",
+    "  print(f\"INT4 blockwise clip: {os.path.getsize(int4_path) / 1024 / 1024:.1f} MB\")\n",
+    "\n",
+    "  # AOT 컴파일 테스트\n",
+    "  sm8850_target = qnn_target.Target(qnn_target.SocModel.SM8850)\n",
+    "  aot_result = aot_lib.aot_compile(\n",
+    "      int4_path,\n",
+    "      target=[sm8850_target],\n",
+    "      keep_going=True,\n",
+    "  )\n",
+    "  print(f\"Success: {len(aot_result.models_with_backend)}\")\n",
+    "  print(f\"Failed: {len(aot_result.failed_backends)}\")\n",
+    "  for backend, error in aot_result.failed_backends:\n",
+    "      print(f\"  Error: {error[:300]}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c9974fa3-8be4-4110-9194-892d05219ec7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "  COMPONENTS = [\"open_clip\", \"decoder\", \"diffusion\"]\n",
+    "  SRC_DIR = \"/tmp/sdxl_tflite\"\n",
+    "\n",
+    "  for name in COMPONENTS:\n",
+    "      src = f\"{SRC_DIR}/{name}.tflite\"\n",
+    "      print(f\"\\n=== {name} ===\")\n",
+    "\n",
+    "      qt = aie_quantizer.Quantizer(src)\n",
+    "      qt.update_quantization_recipe(\n",
+    "          regex=\".*\",\n",
+    "          operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
+    "          algorithm_key=\"min_max_uniform_quantize\",\n",
+    "          op_config=aie_quantizer.qtyping.OpQuantizationConfig(\n",
+    "              weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
+    "                  num_bits=4,\n",
+    "                  symmetric=True,\n",
+    "                  granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
+    "                  dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
+    "              ),\n",
+    "              compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
+    "              explicit_dequantize=False,\n",
+    "          ),\n",
+    "      )\n",
+    "      result = qt.quantize()\n",
+    "      out_path = f\"{TEST_DIR}/{name}_int4.tflite\"\n",
+    "      with open(out_path, \"wb\") as f:\n",
+    "          f.write(result.quantized_model)\n",
+    "\n",
+    "      orig_mb = os.path.getsize(src) / 1024 / 1024\n",
+    "      quant_mb = os.path.getsize(out_path) / 1024 / 1024\n",
+    "      print(f\"  Size: {orig_mb:.1f} MB -> {quant_mb:.1f} MB ({quant_mb/orig_mb*100:.0f}%)\")\n",
+    "\n",
+    "      aot_result = aot_lib.aot_compile(\n",
+    "          out_path,\n",
+    "          target=[sm8850_target],\n",
+    "          keep_going=True,\n",
+    "      )\n",
+    "      print(f\"  AOT: Success={len(aot_result.models_with_backend)}, Failed={len(aot_result.failed_backends)}\")\n",
+    "      for backend, error in aot_result.failed_backends:\n",
+    "          print(f\"  Error: {error[:300]}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "69c9286b-c246-438b-ac94-d5d7774de151",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Quantize: 469.6 MB -> 469.6 MB (100%)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7f78061d12674ad286882e3856f4e638",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Backend:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "AOT: Success=1, Failed=0\n"
+     ]
+    }
+   ],
+   "source": [
+    "  import os\n",
+    "  from ai_edge_quantizer import quantizer as aie_quantizer\n",
+    "  from ai_edge_litert.aot import aot_compile as aot_lib\n",
+    "  from ai_edge_litert.aot.vendors.qualcomm import target as qnn_target\n",
+    "\n",
+    "  SRC = \"/workspace/sdxl_tflite/clip.tflite\"\n",
+    "  TEST_DIR = \"/tmp/sdxl_int8_blockwise_test\"\n",
+    "  os.makedirs(TEST_DIR, exist_ok=True)\n",
+    "\n",
+    "  # INT8 BLOCKWISE_32\n",
+    "  qt = aie_quantizer.Quantizer(SRC)\n",
+    "  qt.update_quantization_recipe(\n",
+    "      regex=\".*\",\n",
+    "      operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
+    "      algorithm_key=\"min_max_uniform_quantize\",\n",
+    "      op_config=aie_quantizer.qtyping.OpQuantizationConfig(\n",
+    "          weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
+    "              num_bits=8,\n",
+    "              symmetric=True,\n",
+    "              granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
+    "              dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
+    "          ),\n",
+    "          compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
+    "          explicit_dequantize=False,\n",
+    "      ),\n",
+    "  )\n",
+    "  result = qt.quantize()\n",
+    "  out_path = f\"{TEST_DIR}/clip_int8_bw32.tflite\"\n",
+    "  with open(out_path, \"wb\") as f:\n",
+    "      f.write(result.quantized_model)\n",
+    "\n",
+    "  orig_mb = os.path.getsize(SRC) / 1024 / 1024\n",
+    "  quant_mb = os.path.getsize(out_path) / 1024 / 1024\n",
+    "  print(f\"Quantize: {orig_mb:.1f} MB -> {quant_mb:.1f} MB ({quant_mb/orig_mb*100:.0f}%)\")\n",
+    "\n",
+    "  # AOT\n",
+    "  sm8850_target = qnn_target.Target(qnn_target.SocModel.SM8850)\n",
+    "  aot_result = aot_lib.aot_compile(\n",
+    "      out_path,\n",
+    "      target=[sm8850_target],\n",
+    "      keep_going=True,\n",
+    "  )\n",
+    "  print(f\"AOT: Success={len(aot_result.models_with_backend)}, Failed={len(aot_result.failed_backends)}\")\n",
+    "  for backend, error in aot_result.failed_backends:\n",
+    "      print(f\"Error: {error[:300]}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "34754c5d-7180-4f3a-9171-de27988a28d9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.12/dist-packages/tensorflow/lite/python/interpreter.py:457: UserWarning:     Warning: tf.lite.Interpreter is deprecated and is scheduled for deletion in\n",
+      "    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.\n",
+      "    See the [migration guide](https://ai.google.dev/edge/litert/migration)\n",
+      "    for details.\n",
+      "    \n",
+      "  warnings.warn(_INTERPRETER_DELETION_WARNING)\n"
+     ]
+    }
+   ],
+   "source": [
+    "  # 양자화 전후 모델 내부 확인\n",
+    "  import tensorflow as tf\n",
+    "\n",
+    "  orig = tf.lite.Interpreter(model_path=\"/workspace/sdxl_tflite/clip.tflite\")\n",
+    "  orig.allocate_tensors()\n",
+    "\n",
+    "  quant = tf.lite.Interpreter(model_path=\"/tmp/sdxl_int8_blockwise_test/clip_int8_bw32.tflite\")\n",
+    "  quant.allocate_tensors()\n",
+    "\n",
+    "  # 가중치 텐서 dtype 비교\n",
+    "  print(\"=== Original ===\")\n",
+    "  for t in orig.get_tensor_details()[:5]:\n",
+    "      print(f\"  {t['name'][:50]:50s} dtype={t['dtype']}\")\n",
+    "\n",
+    "  print(\"\\n=== INT8 blockwise ===\")\n",
+    "  for t in quant.get_tensor_details()[:5]:\n",
+    "      print(f\"  {t['name'][:50]:50s} dtype={t['dtype']}\")\n",
+    "\n",
+    "  # INT4 것도 비교\n",
+    "  int4 = tf.lite.Interpreter(model_path=\"/tmp/sdxl_int4_test/clip_int4.tflite\")\n",
+    "  int4.allocate_tensors()\n",
+    "  print(\"\\n=== INT4 blockwise ===\")\n",
+    "  for t in int4.get_tensor_details()[:5]:\n",
+    "      print(f\"  {t['name'][:50]:50s} dtype={t['dtype']}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "98db8f3d-1099-468a-87b2-e7f60431b948",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Original: 469.6 MB\n",
+      "Quantized bytes: 469.6 MB\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:311: RuntimeWarning: invalid value encountered in divide\n",
+      "  ret = np.divide(tensor_data, scales)\n",
+      "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:98: RuntimeWarning: invalid value encountered in cast\n",
+      "  return tensor.astype(qtype)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INT4 quantized bytes: 66.8 MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "  import os\n",
+    "\n",
+    "  # 양자화만 다시 해서 확인 (AOT 안 거침)\n",
+    "  from ai_edge_quantizer import quantizer as aie_quantizer\n",
+    "\n",
+    "  SRC = \"/workspace/sdxl_tflite/clip.tflite\"\n",
+    "\n",
+    "  # INT8 blockwise 양자화만\n",
+    "  qt = aie_quantizer.Quantizer(SRC)\n",
+    "  qt.update_quantization_recipe(\n",
+    "      regex=\".*\",\n",
+    "      operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
+    "      algorithm_key=\"min_max_uniform_quantize\",\n",
+    "      op_config=aie_quantizer.qtyping.OpQuantizationConfig(\n",
+    "          weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
+    "              num_bits=8,\n",
+    "              symmetric=True,\n",
+    "              granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
+    "              dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
+    "          ),\n",
+    "          compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
+    "          explicit_dequantize=False,\n",
+    "      ),\n",
+    "  )\n",
+    "  result = qt.quantize()\n",
+    "\n",
+    "  # 양자화 결과 통계 확인\n",
+    "  print(f\"Original: {os.path.getsize(SRC) / 1024 / 1024:.1f} MB\")\n",
+    "  print(f\"Quantized bytes: {len(result.quantized_model) / 1024 / 1024:.1f} MB\")\n",
+    "\n",
+    "  # 양자화 로그 확인\n",
+    "  if hasattr(result, 'log'):\n",
+    "      print(f\"Log: {result.log}\")\n",
+    "  if hasattr(result, 'skipped_ops'):\n",
+    "      print(f\"Skipped: {result.skipped_ops}\")\n",
+    "\n",
+    "  # 비교: INT4도 같은 방식으로 크기 확인\n",
+    "  qt4 = aie_quantizer.Quantizer(SRC)\n",
+    "  qt4.update_quantization_recipe(\n",
+    "      regex=\".*\",\n",
+    "      operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
+    "      algorithm_key=\"min_max_uniform_quantize\",\n",
+    "      op_config=aie_quantizer.qtyping.OpQuantizationConfig(\n",
+    "          weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
+    "              num_bits=4,\n",
+    "              symmetric=True,\n",
+    "              granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
+    "              dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
+    "          ),\n",
+    "          compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
+    "          explicit_dequantize=False,\n",
+    "      ),\n",
+    "  )\n",
+    "  result4 = qt4.quantize()\n",
+    "  print(f\"INT4 quantized bytes: {len(result4.quantized_model) / 1024 / 1024:.1f} MB\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "98d994de-58ff-4741-9521-f8d0823ff089",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

.ipynb_checkpoints/sdxl_porting_example-checkpoint.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

Untitled.ipynb ADDED Viewed

	@@ -0,0 +1,159 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "950d36bf-2792-434e-920d-31954ec49878",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "text_projection.npy: 6.3 MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "  import os\n",
+    "  npy_path = \"/workspace/sdxl_tflite/text_projection.npy\"\n",
+    "  size_mb = os.path.getsize(npy_path) / 1024 / 1024\n",
+    "  print(f\"text_projection.npy: {size_mb:.1f} MB\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "0db2e3ce-274a-4af3-bdd3-87886afd603a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done: (1280, 1280), float32\n"
+     ]
+    }
+   ],
+   "source": [
+    "  import numpy as np\n",
+    "  tp = np.load(\"/workspace/sdxl_tflite_fp16/text_projection.npy\")\n",
+    "  tp.astype(np.float32).tofile(\"/workspace/sdxl_tflite_fp16/text_projection.bin\")\n",
+    "  print(f\"Done: {tp.shape}, {tp.dtype}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "eb0f13c8-4ac0-4e8e-b717-5e4723b49e93",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/workspace/sdxl_tflite/clip.tflite: 470 MB\n",
+      "/workspace/sdxl_tflite/open_clip.tflite: 2644 MB\n",
+      "/workspace/sdxl_tflite/diffusion.tflite: 9800 MB\n",
+      "/workspace/sdxl_tflite/decoder.tflite: 189 MB\n",
+      "\n",
+      "/workspace/sdxl_tflite_fp16/clip.tflite: 235 MB\n",
+      "/workspace/sdxl_tflite_fp16/open_clip.tflite: 1323 MB\n",
+      "/workspace/sdxl_tflite_fp16/diffusion.tflite: 4906 MB\n",
+      "/workspace/sdxl_tflite_fp16/decoder.tflite: 95 MB\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "  import os\n",
+    "  for d in [\"/workspace/sdxl_tflite\", \"/workspace/sdxl_tflite_fp16\"]:\n",
+    "      if os.path.exists(d):\n",
+    "          for f in [\"clip.tflite\", \"open_clip.tflite\", \"diffusion.tflite\", \"decoder.tflite\"]:\n",
+    "              path = f\"{d}/{f}\"\n",
+    "              if os.path.exists(path):\n",
+    "                  size = os.path.getsize(path) / 1024 / 1024\n",
+    "                  print(f\"{d}/{f}: {size:.0f} MB\")\n",
+    "          print()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "4409a65f-e9b8-417a-98a3-6683c8e3d6f2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "FP16 clip: 235 MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "  # 원격 서버에서 실행\n",
+    "  import os\n",
+    "  size = os.path.getsize(\"/workspace/sdxl_tflite_fp16/clip.tflite\")\n",
+    "  print(f\"FP16 clip: {size / 1024 / 1024:.0f} MB\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "d1b11b7e-3acd-4f78-a091-36f628198fc3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/workspace/sdxl_tflite_quantized/clip.tflite: 120 MB\n",
+      "/workspace/sdxl_tflite_quantized/open_clip.tflite: 668 MB\n",
+      "/workspace/sdxl_tflite_quantized/diffusion.tflite: 2476 MB\n",
+      "/workspace/sdxl_tflite_quantized/decoder.tflite: 48 MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "  import os\n",
+    "  # INT8 모델 경로 확인\n",
+    "  for d in [\"/workspace/sdxl_tflite_quantized\", \"/tmp/sdxl_tflite_quantized\"]:\n",
+    "      if os.path.exists(d):\n",
+    "          for f in os.listdir(d):\n",
+    "              if f.endswith('.tflite'):\n",
+    "                  size = os.path.getsize(f\"{d}/{f}\") / 1024 / 1024\n",
+    "                  print(f\"{d}/{f}: {size:.0f} MB\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3b16e345-80ad-495f-8151-8a1664b5446f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

sdxl_ai_pack/device_targeting_configuration.xml ADDED Viewed

	@@ -0,0 +1,11 @@

+<config:device-targeting-config
+    xmlns:config="http://schemas.android.com/apk/config">
+    <config:device-group name="Qualcomm_SM8850">
+        <config:device-selector>
+            <config:system-on-chip manufacturer="Qualcomm" model="SM8850"/>
+        </config:device-selector>
+        <config:device-selector>
+            <config:system-on-chip manufacturer="QTI" model="SM8850"/>
+        </config:device-selector>
+    </config:device-group>
+</config:device-targeting-config>

sdxl_ai_pack/sdxl_clip_mtk/src/main/assets/model#group_Qualcomm_SM8850/placeholder.txt ADDED Viewed

File without changes

sdxl_ai_pack/sdxl_clip_mtk/src/main/assets/model#group_other/placeholder.txt ADDED Viewed

File without changes

sdxl_ai_pack/sdxl_decoder_mtk/src/main/assets/model#group_Qualcomm_SM8850/placeholder.txt ADDED Viewed

File without changes

sdxl_ai_pack/sdxl_decoder_mtk/src/main/assets/model#group_other/placeholder.txt ADDED Viewed

File without changes

sdxl_ai_pack/sdxl_diffusion/src/main/assets/model#group_Qualcomm_SM8850/diffusion.tflite ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c467a24fc1ae7aa397e74f0480ba981bdebe4b94b5808cb6a220bc350816acf
+size 2587049280

sdxl_ai_pack/sdxl_diffusion_mtk/src/main/assets/model#group_Qualcomm_SM8850/placeholder.txt ADDED Viewed

File without changes

sdxl_ai_pack/sdxl_diffusion_mtk/src/main/assets/model#group_other/placeholder.txt ADDED Viewed

File without changes

sdxl_ai_pack/sdxl_open_clip_mtk/src/main/assets/model#group_Qualcomm_SM8850/placeholder.txt ADDED Viewed

File without changes

sdxl_ai_pack/sdxl_open_clip_mtk/src/main/assets/model#group_other/placeholder.txt ADDED Viewed

File without changes

sdxl_int4_aot_quantization.ipynb ADDED Viewed

	@@ -0,0 +1,1485 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "30781904-bd35-473d-9016-1f33ed9f036f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "==================================================\n",
+      "[clip] Quantizing...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:311: RuntimeWarning: invalid value encountered in divide\n",
+      "  ret = np.divide(tensor_data, scales)\n",
+      "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:98: RuntimeWarning: invalid value encountered in cast\n",
+      "  return tensor.astype(qtype)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[clip] Size: 469.6 MB -> 66.8 MB (14%)\n",
+      "[clip] AOT compiling for SM8850...\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d5ab30865c774458b49fb8b7586e314f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Backend:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[clip] AOT: Success=1, Failed=0\n",
+      "\n",
+      "==================================================\n",
+      "[open_clip] Quantizing...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:311: RuntimeWarning: divide by zero encountered in divide\n",
+      "  ret = np.divide(tensor_data, scales)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[open_clip] Size: 2643.7 MB -> 374.0 MB (14%)\n",
+      "[open_clip] AOT compiling for SM8850...\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0f7cde62157f4eb48338fedab0d8747a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Backend:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[open_clip] AOT: Success=1, Failed=0\n",
+      "\n",
+      "==================================================\n",
+      "[diffusion] Quantizing...\n",
+      "[diffusion] Size: 9799.6 MB -> 2480.8 MB (25%)\n",
+      "[diffusion] AOT compiling for SM8850...\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "28a7fba90a074601b1834938352c8193",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Backend:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[diffusion] AOT: Success=1, Failed=0\n",
+      "\n",
+      "==================================================\n",
+      "[decoder] Quantizing...\n",
+      "[decoder] Size: 189.2 MB -> 185.8 MB (98%)\n",
+      "[decoder] AOT compiling for SM8850...\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e382a5c82a6f45a9aece16b94fd090d8",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Backend:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[decoder] AOT: Success=1, Failed=0\n",
+      "\n",
+      "==================================================\n",
+      "Summary:\n",
+      "  clip: OK\n",
+      "  open_clip: OK\n",
+      "  diffusion: OK\n",
+      "  decoder: OK\n",
+      "\n",
+      "Output: /workspace/sdxl_tflite_aot_int4_blockwise_32\n",
+      "Files: ['clip_int4.tflite', 'clip_int4_Qualcomm_SM8850_apply_plugin.tflite', 'open_clip_int4.tflite', 'open_clip_int4_Qualcomm_SM8850_apply_plugin.tflite', 'diffusion_int4.tflite', 'diffusion_int4_Qualcomm_SM8850_apply_plugin.tflite', 'decoder_int4.tflite', 'decoder_int4_Qualcomm_SM8850_apply_plugin.tflite']\n"
+     ]
+    }
+   ],
+   "source": [
+    "  import os\n",
+    "  from ai_edge_quantizer import quantizer as aie_quantizer\n",
+    "  from ai_edge_litert.aot import aot_compile as aot_lib\n",
+    "  from ai_edge_litert.aot.vendors.qualcomm import target as qnn_target\n",
+    "\n",
+    "  SRC_DIR = \"/workspace/sdxl_tflite\"\n",
+    "  OUT_DIR = \"/workspace/sdxl_tflite_aot_int4_blockwise_32\"\n",
+    "  os.makedirs(OUT_DIR, exist_ok=True)\n",
+    "\n",
+    "  COMPONENTS = [\"clip\", \"open_clip\", \"diffusion\", \"decoder\"]\n",
+    "  sm8850_target = qnn_target.Target(qnn_target.SocModel.SM8850)\n",
+    "\n",
+    "  int4_config = aie_quantizer.qtyping.OpQuantizationConfig(\n",
+    "      weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
+    "          num_bits=4,\n",
+    "          symmetric=True,\n",
+    "          granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
+    "          dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
+    "      ),\n",
+    "      compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
+    "      explicit_dequantize=False,\n",
+    "  )\n",
+    "\n",
+    "  results = {}\n",
+    "\n",
+    "  for name in COMPONENTS:\n",
+    "      src = f\"{SRC_DIR}/{name}.tflite\"\n",
+    "      int4_path = f\"{OUT_DIR}/{name}_int4.tflite\"\n",
+    "      print(f\"\\n{'='*50}\")\n",
+    "      print(f\"[{name}] Quantizing...\")\n",
+    "\n",
+    "      qt = aie_quantizer.Quantizer(src)\n",
+    "      qt.update_quantization_recipe(\n",
+    "          regex=\".*\",\n",
+    "          operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
+    "          algorithm_key=\"min_max_uniform_quantize\",\n",
+    "          op_config=int4_config,\n",
+    "      )\n",
+    "      quant_result = qt.quantize()\n",
+    "      with open(int4_path, \"wb\") as f:\n",
+    "          f.write(quant_result.quantized_model)\n",
+    "\n",
+    "      orig_mb = os.path.getsize(src) / 1024 / 1024\n",
+    "      quant_mb = os.path.getsize(int4_path) / 1024 / 1024\n",
+    "      print(f\"[{name}] Size: {orig_mb:.1f} MB -> {quant_mb:.1f} MB ({quant_mb/orig_mb*100:.0f}%)\")\n",
+    "\n",
+    "      print(f\"[{name}] AOT compiling for SM8850...\")\n",
+    "      aot_result = aot_lib.aot_compile(\n",
+    "          int4_path,\n",
+    "          output_dir=OUT_DIR,\n",
+    "          target=[sm8850_target],\n",
+    "          keep_going=True,\n",
+    "      )\n",
+    "\n",
+    "      success = len(aot_result.models_with_backend)\n",
+    "      failed = len(aot_result.failed_backends)\n",
+    "      results[name] = {\"success\": success, \"failed\": failed}\n",
+    "      print(f\"[{name}] AOT: Success={success}, Failed={failed}\")\n",
+    "      for backend, error in aot_result.failed_backends:\n",
+    "          print(f\"[{name}] Error: {error[:300]}\")\n",
+    "\n",
+    "  print(f\"\\n{'='*50}\")\n",
+    "  print(\"Summary:\")\n",
+    "  for name, r in results.items():\n",
+    "      status = \"OK\" if r[\"success\"] > 0 else \"FAIL\"\n",
+    "      print(f\"  {name}: {status}\")\n",
+    "  print(f\"\\nOutput: {OUT_DIR}\")\n",
+    "  print(f\"Files: {os.listdir(OUT_DIR)}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ae2cb21d-e31f-464d-96f3-91218650b94f",
+   "metadata": {},
+   "source": [
+    "위에는 int4 blockwise 32 양자화, 아래는 그걸 ai pack로 합치는 코드"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "09cf7c20-3b5c-4eba-aa43-5caee7180cc0",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== clip ===\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "83ea5774c8564e3088f1ba34719269f0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Backend:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  AOT: Success=2 (including fallback)\n",
+      "  AI Pack exported: sdxl_clip\n",
+      "\n",
+      "=== open_clip ===\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b8b2874d25d743c4b33b09ad1f8b70c3",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Backend:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  AOT: Success=2 (including fallback)\n",
+      "  AI Pack exported: sdxl_open_clip\n",
+      "\n",
+      "=== diffusion ===\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "fe62dc74e729467db4af87dde7bda315",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Backend:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  AOT: Success=2 (including fallback)\n",
+      "  AI Pack exported: sdxl_diffusion\n",
+      "\n",
+      "=== decoder ===\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "829a4709709e462ca8f64b004d6a169a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Backend:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  AOT: Success=2 (including fallback)\n",
+      "  AI Pack exported: sdxl_decoder\n",
+      "\n",
+      "==================================================\n",
+      "AI Pack directory: /workspace/sdxl_ai_pack\n",
+      "sdxl_ai_pack/\n",
+      "  device_targeting_configuration.xml\n",
+      "  sdxl_clip/\n",
+      "    src/\n",
+      "      main/\n",
+      "        assets/\n",
+      "          model#group_Qualcomm_SM8850/\n",
+      "            clip.tflite\n",
+      "          model#group_other/\n",
+      "            clip.tflite\n",
+      "  sdxl_clip_mtk/\n",
+      "    src/\n",
+      "      main/\n",
+      "        assets/\n",
+      "          model#group_Qualcomm_SM8850/\n",
+      "            placeholder.txt\n",
+      "          model#group_other/\n",
+      "            placeholder.txt\n",
+      "  sdxl_open_clip/\n",
+      "    src/\n",
+      "      main/\n",
+      "        assets/\n",
+      "          model#group_Qualcomm_SM8850/\n",
+      "            open_clip.tflite\n",
+      "          model#group_other/\n",
+      "            open_clip.tflite\n",
+      "  sdxl_open_clip_mtk/\n",
+      "    src/\n",
+      "      main/\n",
+      "        assets/\n",
+      "          model#group_Qualcomm_SM8850/\n",
+      "            placeholder.txt\n",
+      "          model#group_other/\n",
+      "            placeholder.txt\n",
+      "  sdxl_diffusion/\n",
+      "    src/\n",
+      "      main/\n",
+      "        assets/\n",
+      "          model#group_Qualcomm_SM8850/\n",
+      "            diffusion.tflite\n",
+      "          model#group_other/\n",
+      "            diffusion.tflite\n",
+      "  sdxl_diffusion_mtk/\n",
+      "    src/\n",
+      "      main/\n",
+      "        assets/\n",
+      "          model#group_Qualcomm_SM8850/\n",
+      "            placeholder.txt\n",
+      "          model#group_other/\n",
+      "            placeholder.txt\n",
+      "  sdxl_decoder/\n",
+      "    src/\n",
+      "      main/\n",
+      "        assets/\n",
+      "          model#group_Qualcomm_SM8850/\n",
+      "            decoder.tflite\n",
+      "          model#group_other/\n",
+      "            decoder.tflite\n",
+      "  sdxl_decoder_mtk/\n",
+      "    src/\n",
+      "      main/\n",
+      "        assets/\n",
+      "          model#group_Qualcomm_SM8850/\n",
+      "            placeholder.txt\n",
+      "          model#group_other/\n",
+      "            placeholder.txt\n"
+     ]
+    }
+   ],
+   "source": [
+    "  import os\n",
+    "  import pathlib\n",
+    "  import shutil\n",
+    "  from ai_edge_litert.aot import aot_compile as aot_lib\n",
+    "  from ai_edge_litert.aot.ai_pack import export_lib as ai_pack_export\n",
+    "  from ai_edge_litert.aot.vendors.qualcomm import target as qnn_target\n",
+    "  from ai_edge_litert.aot.core import types\n",
+    "  from ai_edge_litert.aot.vendors.fallback_backend import FallbackBackend\n",
+    "\n",
+    "  OUT_DIR = \"/workspace/sdxl_tflite_aot_int4_blockwise_32\"\n",
+    "  AI_PACK_DIR = \"/workspace/sdxl_ai_pack\"\n",
+    "  COMPONENTS = [\"clip\", \"open_clip\", \"diffusion\", \"decoder\"]\n",
+    "\n",
+    "  shutil.rmtree(AI_PACK_DIR, ignore_errors=True)\n",
+    "\n",
+    "  for name in COMPONENTS:\n",
+    "      int4_path = f\"{OUT_DIR}/{name}_int4.tflite\"\n",
+    "      print(f\"\\n=== {name} ===\")\n",
+    "\n",
+    "      # SM8850 AOT 컴파일\n",
+    "      sm8850_target = qnn_target.Target(qnn_target.SocModel.SM8850)\n",
+    "      result = aot_lib.aot_compile(\n",
+    "          int4_path,\n",
+    "          target=[sm8850_target],\n",
+    "          keep_going=True,\n",
+    "      )\n",
+    "\n",
+    "      # fallback 모델 추가 (INT4 양자화된 tflite를 CPU/GPU fallback으로 사용)\n",
+    "      fallback_backend = FallbackBackend.create({\"backend_id\": FallbackBackend.id()})\n",
+    "      fallback_model = types.Model.create_from_path(pathlib.Path(int4_path))\n",
+    "      result.models_with_backend.append((fallback_backend, fallback_model))\n",
+    "\n",
+    "      print(f\"  AOT: Success={len(result.models_with_backend)} (including fallback)\")\n",
+    "\n",
+    "      # AI Pack export\n",
+    "      ai_pack_export.export(\n",
+    "          compiled_models=result,\n",
+    "          ai_pack_dir=AI_PACK_DIR,\n",
+    "          ai_pack_name=f\"sdxl_{name}\",\n",
+    "          litert_model_name=name,\n",
+    "      )\n",
+    "      print(f\"  AI Pack exported: sdxl_{name}\")\n",
+    "\n",
+    "  # 결과 확인\n",
+    "  print(f\"\\n{'='*50}\")\n",
+    "  print(f\"AI Pack directory: {AI_PACK_DIR}\")\n",
+    "  for root, dirs, files in os.walk(AI_PACK_DIR):\n",
+    "      level = root.replace(AI_PACK_DIR, '').count(os.sep)\n",
+    "      indent = '  ' * level\n",
+    "      print(f\"{indent}{os.path.basename(root)}/\")\n",
+    "      subindent = '  ' * (level + 1)\n",
+    "      for f in files:\n",
+    "          print(f\"{subindent}{f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5cb03868-abfe-46ba-aaf0-7752532626c2",
+   "metadata": {},
+   "source": [
+    "위에는 ai_pack로 패킹하는 코드 아래는 이미지 생성하는 테스트 코드"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "0b8067e3-b1d9-497a-938e-3bac70175efa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "  # import os\n",
+    "  # import time\n",
+    "  # import numpy as np\n",
+    "  # import torch\n",
+    "  # from PIL import Image\n",
+    "  # from tqdm import auto as tqdm\n",
+    "  # import litert_torch\n",
+    "  # from ai_edge_litert import interpreter as interpreter_lib\n",
+    "  # from litert_torch.generative.examples.stable_diffusion import samplers\n",
+    "  # from litert_torch.generative.examples.stable_diffusion import tokenizer\n",
+    "  # from litert_torch.generative.examples.stable_diffusion_xl import util\n",
+    "\n",
+    "  # # ============================================================\n",
+    "  # # 경로 설정\n",
+    "  # # ============================================================\n",
+    "  # MODEL_DIR = \"/workspace/sdxl_tflite_aot_int4_blockwise_32\"\n",
+    "  # SRC_DIR = \"/workspace/sdxl_tflite\"  # text_projection.npy 위치\n",
+    "  # TOKENIZER_DIR = os.path.join(os.path.expanduser(\"~\"), \"stable-diffusion-xl/tokenizer\")\n",
+    "  # OUTPUT_PATH = \"/workspace/sdxl_int4_generated.png\"\n",
+    "\n",
+    "  # # ============================================================\n",
+    "  # # 모델 로드\n",
+    "  # # ============================================================\n",
+    "  # print(\"Loading INT4 blockwise quantized models...\")\n",
+    "\n",
+    "  # clip = litert_torch.load(f\"{MODEL_DIR}/clip_int4.tflite\")\n",
+    "  # open_clip = litert_torch.load(f\"{MODEL_DIR}/open_clip_int4.tflite\")\n",
+    "  # diffusion = litert_torch.load(f\"{MODEL_DIR}/diffusion_int4.tflite\")\n",
+    "  # decoder = litert_torch.load(f\"{MODEL_DIR}/decoder_int4.tflite\")\n",
+    "  # text_projection = np.load(f\"{SRC_DIR}/text_projection.npy\")\n",
+    "  # tok = tokenizer.Tokenizer(TOKENIZER_DIR)\n",
+    "\n",
+    "  # num_threads = os.cpu_count()\n",
+    "  # for model in [clip, open_clip, diffusion, decoder]:\n",
+    "  #     model_bytes = model.model_content()\n",
+    "  #     model.set_interpreter_builder(\n",
+    "  #         lambda b=model_bytes: interpreter_lib.Interpreter(\n",
+    "  #             model_content=b,\n",
+    "  #             experimental_default_delegate_latest_features=False,\n",
+    "  #             num_threads=num_threads,\n",
+    "  #         )\n",
+    "  #     )\n",
+    "  # print(f\"All models loaded. (num_threads={num_threads})\")\n",
+    "\n",
+    "  # # ============================================================\n",
+    "  # # 생성 파라미터\n",
+    "  # # ============================================================\n",
+    "  # PROMPT = \"a photograph of an astronaut riding a horse on the moon, high quality, detailed\"\n",
+    "  # NEGATIVE_PROMPT = \"blurry, low quality\"\n",
+    "  # CFG_SCALE = 7.5\n",
+    "  # HEIGHT = 1024\n",
+    "  # WIDTH = 1024\n",
+    "  # N_STEPS = 20\n",
+    "  # SEED = 42\n",
+    "  # SAMPLER_NAME = \"k_euler\"\n",
+    "\n",
+    "  # # ============================================================\n",
+    "  # # 이미지 생성\n",
+    "  # # ============================================================\n",
+    "  # np.random.seed(SEED)\n",
+    "  # start_time = time.time()\n",
+    "\n",
+    "  # # 1. Sampler\n",
+    "  # if SAMPLER_NAME == \"k_euler\":\n",
+    "  #     sampler = samplers.KEulerSampler(n_inference_steps=N_STEPS)\n",
+    "  # elif SAMPLER_NAME == \"k_euler_ancestral\":\n",
+    "  #     sampler = samplers.KEulerAncestralSampler(n_inference_steps=N_STEPS)\n",
+    "  # elif SAMPLER_NAME == \"k_lms\":\n",
+    "  #     sampler = samplers.KLMSSampler(n_inference_steps=N_STEPS)\n",
+    "\n",
+    "  # # 2. Dual text encoding\n",
+    "  # print(\"Step 1: Dual text encoding (CLIP-L + OpenCLIP-G)...\")\n",
+    "  # cond_tokens = np.array(tok.encode(PROMPT)).astype(np.int32)\n",
+    "  # uncond_tokens = np.array(tok.encode(NEGATIVE_PROMPT)).astype(np.int32)\n",
+    "\n",
+    "  # cond_clip = clip(cond_tokens, signature_name=\"encode\")\n",
+    "  # uncond_clip = clip(uncond_tokens, signature_name=\"encode\")\n",
+    "\n",
+    "  # cond_open_hidden, cond_final = open_clip(cond_tokens, signature_name=\"encode\")\n",
+    "  # uncond_open_hidden, uncond_final = open_clip(uncond_tokens, signature_name=\"encode\")\n",
+    "\n",
+    "  # # EOS pooling + text_projection\n",
+    "  # cond_eos_idx = int(np.argmax(cond_tokens))\n",
+    "  # uncond_eos_idx = int(np.argmax(uncond_tokens))\n",
+    "  # cond_pooled = (cond_final[0, cond_eos_idx] @ text_projection)[np.newaxis, :]\n",
+    "  # uncond_pooled = (uncond_final[0, uncond_eos_idx] @ text_projection)[np.newaxis, :]\n",
+    "\n",
+    "  # cond_context = np.concatenate([cond_clip, cond_open_hidden], axis=-1)\n",
+    "  # uncond_context = np.concatenate([uncond_clip, uncond_open_hidden], axis=-1)\n",
+    "  # context = np.concatenate([cond_context, uncond_context], axis=0)\n",
+    "\n",
+    "  # # 3. Add embedding\n",
+    "  # print(\"Step 2: Computing add_embedding...\")\n",
+    "  # add_time_ids = util.get_add_time_ids(\n",
+    "  #     original_size=(HEIGHT, WIDTH),\n",
+    "  #     crop_coords=(0, 0),\n",
+    "  #     target_size=(HEIGHT, WIDTH),\n",
+    "  # )\n",
+    "  # time_ids_emb = util.encode_add_time_ids(add_time_ids).numpy()\n",
+    "  # cond_add_emb = np.concatenate([cond_pooled, time_ids_emb], axis=-1)\n",
+    "  # uncond_add_emb = np.concatenate([uncond_pooled, time_ids_emb], axis=-1)\n",
+    "  # add_emb = np.concatenate([cond_add_emb, uncond_add_emb], axis=0).astype(np.float32)\n",
+    "\n",
+    "  # # 4. Initialize latents\n",
+    "  # print(\"Step 3: Initializing latents...\")\n",
+    "  # noise_shape = (1, 4, HEIGHT // 8, WIDTH // 8)\n",
+    "  # latents = np.random.normal(size=noise_shape).astype(np.float32)\n",
+    "  # latents *= sampler.initial_scale\n",
+    "\n",
+    "  # # 5. Diffusion loop\n",
+    "  # print(f\"Step 4: Diffusion ({N_STEPS} steps)...\")\n",
+    "  # timesteps = tqdm.tqdm(sampler.timesteps, desc=\"Denoising\")\n",
+    "  # for _, timestep in enumerate(timesteps):\n",
+    "  #     time_embedding = util.get_time_embedding(timestep).numpy()\n",
+    "  #     input_latents = latents * sampler.get_input_scale()\n",
+    "  #     input_latents = input_latents.repeat(2, axis=0)\n",
+    "\n",
+    "  #     output = diffusion(\n",
+    "  #         input_latents.astype(np.float32),\n",
+    "  #         context.astype(np.float32),\n",
+    "  #         time_embedding.astype(np.float32),\n",
+    "  #         add_emb,\n",
+    "  #         signature_name=\"diffusion\",\n",
+    "  #     )\n",
+    "  #     output_cond, output_uncond = np.split(output, 2, axis=0)\n",
+    "  #     output = CFG_SCALE * (output_cond - output_uncond) + output_uncond\n",
+    "  #     latents = sampler.step(latents, output)\n",
+    "\n",
+    "  # # 6. Decode\n",
+    "  # print(\"Step 5: Decoding to image...\")\n",
+    "  # images = decoder(latents.astype(np.float32), signature_name=\"decode\")\n",
+    "  # images = util.rescale(images, (-1, 1), (0, 255), clamp=True)\n",
+    "  # images = util.move_channel(images, to=\"last\")\n",
+    "\n",
+    "  # result_image = Image.fromarray(images[0].astype(np.uint8))\n",
+    "  # result_image.save(OUTPUT_PATH)\n",
+    "\n",
+    "  # total_time = time.time() - start_time\n",
+    "  # print(f\"\\nDone! Total time: {total_time:.1f}s\")\n",
+    "  # print(f\"Image saved to: {OUTPUT_PATH}\")\n",
+    "  # result_image"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "5f173a79-5fe0-47d9-ba16-502fb3ce1ddd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading models...\n",
+      "  clip loaded\n",
+      "  open_clip loaded\n",
+      "  diffusion loaded\n",
+      "  decoder loaded\n",
+      "All models loaded!\n"
+     ]
+    }
+   ],
+   "source": [
+    "  from ai_edge_litert import interpreter as interpreter_lib\n",
+    "  import numpy as np\n",
+    "\n",
+    "  MODEL_DIR = \"/workspace/sdxl_tflite_aot_int4_blockwise_32\"\n",
+    "  num_threads = 32\n",
+    "\n",
+    "  def load_model(path):\n",
+    "      return interpreter_lib.Interpreter(\n",
+    "          model_path=path,\n",
+    "          num_threads=num_threads,\n",
+    "          experimental_op_resolver_type=interpreter_lib.OpResolverType.BUILTIN_WITHOUT_DEFAULT_DELEGATES,\n",
+    "      )\n",
+    "\n",
+    "  def run_model(interp, inputs, signature_name):\n",
+    "      runner = interp.get_signature_runner(signature_name)\n",
+    "      # input key 이름 확인\n",
+    "      input_details = runner.get_input_details()\n",
+    "      output_details = runner.get_output_details()\n",
+    "\n",
+    "      feed = {}\n",
+    "      input_keys = list(input_details.keys())\n",
+    "      for i, key in enumerate(input_keys):\n",
+    "          feed[key] = inputs[i] if isinstance(inputs, (list, tuple)) else inputs\n",
+    "\n",
+    "      result = runner(**feed)\n",
+    "      output_keys = list(output_details.keys())\n",
+    "      if len(output_keys) == 1:\n",
+    "          return result[output_keys[0]]\n",
+    "      return tuple(result[k] for k in output_keys)\n",
+    "\n",
+    "  # 모델 로드\n",
+    "  print(\"Loading models...\")\n",
+    "  clip_interp = load_model(f\"{MODEL_DIR}/clip_int4.tflite\")\n",
+    "  clip_interp.allocate_tensors()\n",
+    "  print(\"  clip loaded\")\n",
+    "\n",
+    "  open_clip_interp = load_model(f\"{MODEL_DIR}/open_clip_int4.tflite\")\n",
+    "  open_clip_interp.allocate_tensors()\n",
+    "  print(\"  open_clip loaded\")\n",
+    "\n",
+    "  diffusion_interp = load_model(f\"{MODEL_DIR}/diffusion_int4.tflite\")\n",
+    "  diffusion_interp.allocate_tensors()\n",
+    "  print(\"  diffusion loaded\")\n",
+    "\n",
+    "  decoder_interp = load_model(f\"{MODEL_DIR}/decoder_int4.tflite\")\n",
+    "  decoder_interp.allocate_tensors()\n",
+    "  print(\"  decoder loaded\")\n",
+    "\n",
+    "  print(\"All models loaded!\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "be818314-4a91-48c9-a0db-3d4aee94e10b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== clip ===\n",
+      "  signature: encode\n",
+      "  inputs: [('args_0', array([ 1, 77], dtype=int32), <class 'numpy.int32'>)]\n",
+      "  outputs: [('output_0', array([  1,  77, 768], dtype=int32), <class 'numpy.float32'>)]\n",
+      "\n",
+      "=== open_clip ===\n",
+      "  signature: encode\n",
+      "  inputs: [('args_0', array([ 1, 77], dtype=int32), <class 'numpy.int32'>)]\n",
+      "  outputs: [('output_0', array([   1,   77, 1280], dtype=int32), <class 'numpy.float32'>), ('output_1', array([   1,   77, 1280], dtype=int32), <class 'numpy.float32'>)]\n",
+      "\n",
+      "=== diffusion ===\n",
+      "  signature: diffusion\n",
+      "  inputs: [('args_0', array([  2,   4, 128, 128], dtype=int32), <class 'numpy.float32'>), ('args_1', array([   2,   77, 2048], dtype=int32), <class 'numpy.float32'>), ('args_2', array([  1, 320], dtype=int32), <class 'numpy.float32'>), ('args_3', array([   2, 2816], dtype=int32), <class 'numpy.float32'>)]\n",
+      "  outputs: [('output_0', array([  2,   4, 128, 128], dtype=int32), <class 'numpy.float32'>)]\n",
+      "\n",
+      "=== decoder ===\n",
+      "  signature: decode\n",
+      "  inputs: [('args_0', array([  1,   4, 128, 128], dtype=int32), <class 'numpy.float32'>)]\n",
+      "  outputs: [('output_0', array([   1,    3, 1024, 1024], dtype=int32), <class 'numpy.float32'>)]\n"
+     ]
+    }
+   ],
+   "source": [
+    "  for name, interp in [(\"clip\", clip_interp), (\"open_clip\", open_clip_interp),\n",
+    "                        (\"diffusion\", diffusion_interp), (\"decoder\", decoder_interp)]:\n",
+    "      sigs = interp.get_signature_list()\n",
+    "      print(f\"\\n=== {name} ===\")\n",
+    "      for sig_name in sigs:\n",
+    "          runner = interp.get_signature_runner(sig_name)\n",
+    "          inputs = runner.get_input_details()\n",
+    "          outputs = runner.get_output_details()\n",
+    "          print(f\"  signature: {sig_name}\")\n",
+    "          print(f\"  inputs: {[(k, v['shape'], v['dtype']) for k, v in inputs.items()]}\")\n",
+    "          print(f\"  outputs: {[(k, v['shape'], v['dtype']) for k, v in outputs.items()]}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1cf4bb8d-19d3-4a66-bbcf-f1f135bf9944",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Step 1: Dual text encoding...\n",
+      "Step 2: Computing add_embedding...\n",
+      "Step 3: Initializing latents...\n",
+      "Step 4: Diffusion (20 steps)...\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e7a7f1c5949a4ea1bbb200c17f624850",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Denoising:   0%|          | 0/20 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "  import os\n",
+    "  import time\n",
+    "  import numpy as np\n",
+    "  import torch\n",
+    "  from PIL import Image\n",
+    "  from tqdm import auto as tqdm\n",
+    "  from litert_torch.generative.examples.stable_diffusion import samplers\n",
+    "  from litert_torch.generative.examples.stable_diffusion import tokenizer\n",
+    "  from litert_torch.generative.examples.stable_diffusion_xl import util\n",
+    "\n",
+    "  # ============================================================\n",
+    "  # 설정\n",
+    "  # ============================================================\n",
+    "  SRC_DIR = \"/workspace/sdxl_tflite\"\n",
+    "  TOKENIZER_DIR = os.path.join(os.path.expanduser(\"~\"), \"stable-diffusion-xl/tokenizer\")\n",
+    "  OUTPUT_PATH = \"/workspace/sdxl_int4_generated.png\"\n",
+    "\n",
+    "  text_projection = np.load(f\"{SRC_DIR}/text_projection.npy\")\n",
+    "  tok = tokenizer.Tokenizer(TOKENIZER_DIR)\n",
+    "\n",
+    "  PROMPT = \"a photograph of an astronaut riding a horse on the moon, high quality, detailed\"\n",
+    "  NEGATIVE_PROMPT = \"blurry, low quality\"\n",
+    "  CFG_SCALE = 7.5\n",
+    "  HEIGHT, WIDTH = 1024, 1024\n",
+    "  N_STEPS = 20\n",
+    "  SEED = 42\n",
+    "\n",
+    "  # ============================================================\n",
+    "  # 헬퍼\n",
+    "  # ============================================================\n",
+    "  def run_signature(interp, sig_name, *args):\n",
+    "      runner = interp.get_signature_runner(sig_name)\n",
+    "      input_details = runner.get_input_details()\n",
+    "      input_keys = list(input_details.keys())\n",
+    "      feed = {input_keys[i]: args[i] for i in range(len(args))}\n",
+    "      result = runner(**feed)\n",
+    "      output_keys = list(result.keys())\n",
+    "      if len(output_keys) == 1:\n",
+    "          return result[output_keys[0]]\n",
+    "      return tuple(result[k] for k in output_keys)\n",
+    "\n",
+    "  # ============================================================\n",
+    "  # 생성\n",
+    "  # ============================================================\n",
+    "  np.random.seed(SEED)\n",
+    "  start_time = time.time()\n",
+    "\n",
+    "  sampler = samplers.KEulerSampler(n_inference_steps=N_STEPS)\n",
+    "\n",
+    "  # 1. Text encoding\n",
+    "  print(\"Step 1: Dual text encoding...\")\n",
+    "  cond_tokens = np.array(tok.encode(PROMPT)).astype(np.int32)\n",
+    "  uncond_tokens = np.array(tok.encode(NEGATIVE_PROMPT)).astype(np.int32)\n",
+    "\n",
+    "  cond_clip = run_signature(clip_interp, \"encode\", cond_tokens)\n",
+    "  uncond_clip = run_signature(clip_interp, \"encode\", uncond_tokens)\n",
+    "\n",
+    "  cond_open_hidden, cond_final = run_signature(open_clip_interp, \"encode\", cond_tokens)\n",
+    "  uncond_open_hidden, uncond_final = run_signature(open_clip_interp, \"encode\", uncond_tokens)\n",
+    "\n",
+    "  # EOS pooling + text_projection\n",
+    "  cond_eos_idx = int(np.argmax(cond_tokens))\n",
+    "  uncond_eos_idx = int(np.argmax(uncond_tokens))\n",
+    "  cond_pooled = (cond_final[0, cond_eos_idx] @ text_projection)[np.newaxis, :]\n",
+    "  uncond_pooled = (uncond_final[0, uncond_eos_idx] @ text_projection)[np.newaxis, :]\n",
+    "\n",
+    "  cond_context = np.concatenate([cond_clip, cond_open_hidden], axis=-1)\n",
+    "  uncond_context = np.concatenate([uncond_clip, uncond_open_hidden], axis=-1)\n",
+    "  context = np.concatenate([cond_context, uncond_context], axis=0)\n",
+    "\n",
+    "  # 2. Add embedding\n",
+    "  print(\"Step 2: Computing add_embedding...\")\n",
+    "  add_time_ids = util.get_add_time_ids(\n",
+    "      original_size=(HEIGHT, WIDTH), crop_coords=(0, 0), target_size=(HEIGHT, WIDTH),\n",
+    "  )\n",
+    "  time_ids_emb = util.encode_add_time_ids(add_time_ids).numpy()\n",
+    "  cond_add_emb = np.concatenate([cond_pooled, time_ids_emb], axis=-1)\n",
+    "  uncond_add_emb = np.concatenate([uncond_pooled, time_ids_emb], axis=-1)\n",
+    "  add_emb = np.concatenate([cond_add_emb, uncond_add_emb], axis=0).astype(np.float32)\n",
+    "\n",
+    "  # 3. Initialize latents\n",
+    "  print(\"Step 3: Initializing latents...\")\n",
+    "  latents = np.random.normal(size=(1, 4, HEIGHT // 8, WIDTH // 8)).astype(np.float32)\n",
+    "  latents *= sampler.initial_scale\n",
+    "\n",
+    "  # 4. Diffusion loop\n",
+    "  print(f\"Step 4: Diffusion ({N_STEPS} steps)...\")\n",
+    "  for i, timestep in enumerate(tqdm.tqdm(sampler.timesteps, desc=\"Denoising\")):\n",
+    "      time_embedding = util.get_time_embedding(timestep).numpy()\n",
+    "      input_latents = latents * sampler.get_input_scale()\n",
+    "      input_latents = input_latents.repeat(2, axis=0)\n",
+    "\n",
+    "      output = run_signature(\n",
+    "          diffusion_interp, \"diffusion\",\n",
+    "          input_latents.astype(np.float32),\n",
+    "          context.astype(np.float32),\n",
+    "          time_embedding.astype(np.float32),\n",
+    "          add_emb,\n",
+    "      )\n",
+    "      output_cond, output_uncond = np.split(output, 2, axis=0)\n",
+    "      output = CFG_SCALE * (output_cond - output_uncond) + output_uncond\n",
+    "      latents = sampler.step(latents, output)\n",
+    "\n",
+    "  # 5. Decode\n",
+    "  print(\"Step 5: Decoding to image...\")\n",
+    "  images = run_signature(decoder_interp, \"decode\", latents.astype(np.float32))\n",
+    "  images = util.rescale(images, (-1, 1), (0, 255), clamp=True)\n",
+    "  images = util.move_channel(images, to=\"last\")\n",
+    "\n",
+    "  result_image = Image.fromarray(images[0].astype(np.uint8))\n",
+    "  result_image.save(OUTPUT_PATH)\n",
+    "\n",
+    "  total_time = time.time() - start_time\n",
+    "  print(f\"\\nDone! Total time: {total_time:.1f}s\")\n",
+    "  print(f\"Image saved to: {OUTPUT_PATH}\")\n",
+    "  result_image"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3936e5c5-ceeb-4cc3-819c-702a38a89bf0",
+   "metadata": {},
+   "source": [
+    "# AOT Convertion Code\n",
+    "\n",
+    "위는 완성된 코드, 아래는 테스트용 잡다한거"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "a87c3a83-7811-465e-9079-bced67cbb82e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['Quantizer', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'algorithm_manager', 'algorithm_manager_api', 'algorithms', 'calibrator', 'default_policy', 'model_modifier', 'model_validator', 'params_generator', 'qtyping', 'quantizer', 'recipe', 'recipe_manager', 'transformation_instruction_generator', 'transformation_performer', 'transformations', 'utils']\n",
+      "['__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__']\n"
+     ]
+    }
+   ],
+   "source": [
+    "  # ai_edge_quantizer에 AieQuantizerT 구현체가 있는지 확인\n",
+    "  import ai_edge_quantizer\n",
+    "  print(dir(ai_edge_quantizer))\n",
+    "\n",
+    "  # 혹시 ai_edge_litert.aot 쪽에 있는지도 확인\n",
+    "  import ai_edge_litert.aot\n",
+    "  print(dir(ai_edge_litert.aot))\n",
+    "\n",
+    "  # components에서 import 가능한 quantizer 구현체 확인\n",
+    "  from ai_edge_litert.aot.core import components\n",
+    "  import inspect\n",
+    "  for name, obj in inspect.getmembers(components):\n",
+    "      if inspect.isclass(obj) and issubclass(obj, components.AieQuantizerT) and obj != components.AieQuantizerT:\n",
+    "          print(f\"Found: {name}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "b7f7c046-e555-417d-b877-60cf9845d2dc",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.12/dist-packages/torch/distributed/distributed_c10d.py:351: UserWarning: Device capability of jax unspecified, assuming `cpu` and `cuda`. Please specify it via the `devices` argument of `register_backend`.\n",
+      "  warnings.warn(\n",
+      "An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.\n",
+      "Skipping import of cpp extensions due to incompatible torch version 2.8.0+cu128 for torchao version 0.16.0             Please see https://github.com/pytorch/ao/issues/2919 for more info\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INT4 BLOCKWISE_32 verify passed: QuantConfig(pt2e_quantizer=None, generative_recipe=GenerativeQuantRecipe(\n",
+      "  Default: (a:FP32, w:INT4, DYNAMIC_RANGE, MIN_MAX, BLOCKWISE_32, )\n",
+      "  Embedding: None\n",
+      "  Attention: None\n",
+      "  Feedforward: None\n",
+      "), _quantizer_mode=<_QuantizerMode.AI_EDGE_QUANTIZER: 4>)\n"
+     ]
+    }
+   ],
+   "source": [
+    "  from litert_torch.generative.quantize import quant_attrs, quant_recipes\n",
+    "\n",
+    "  # INT4 BLOCKWISE_32 (DYNAMIC_RANGE 모드)\n",
+    "  quant_config = quant_recipes.full_dynamic_recipe(\n",
+    "      weight_dtype=quant_attrs.Dtype.INT4,\n",
+    "      granularity=quant_attrs.Granularity.BLOCKWISE_32,\n",
+    "  )\n",
+    "  print(\"INT4 BLOCKWISE_32 verify passed:\", quant_config)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "e8c3a18d-09fa-4c3f-8e92-83937f9a97de",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "QuantConfig(pt2e_quantizer=None, generative_recipe=GenerativeQuantRecipe(\n",
+      "  Default: (a:FP32, w:INT4, DYNAMIC_RANGE, MIN_MAX, BLOCKWISE_32, )\n",
+      "  Embedding: None\n",
+      "  Attention: None\n",
+      "  Feedforward: None\n",
+      "), _quantizer_mode=<_QuantizerMode.AI_EDGE_QUANTIZER: 4>)\n"
+     ]
+    }
+   ],
+   "source": [
+    "  from litert_torch.generative.quantize import quant_attrs, quant_recipes\n",
+    "\n",
+    "  quant_config = quant_recipes.full_dynamic_recipe(\n",
+    "      weight_dtype=quant_attrs.Dtype.INT4,\n",
+    "      granularity=quant_attrs.Granularity.BLOCKWISE_32,\n",
+    "  )\n",
+    "  print(quant_config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "4619e935-a84e-4c01-ac96-ede6988f7e52",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'clip_loaded' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mNameError\u001b[39m                                 Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[6]\u001b[39m\u001b[32m, line 19\u001b[39m\n\u001b[32m     14\u001b[39m os.makedirs(TEST_DIR, exist_ok=\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[32m     16\u001b[39m prompt_tokens = torch.full((\u001b[32m1\u001b[39m, \u001b[32m77\u001b[39m), \u001b[32m0\u001b[39m, dtype=torch.int)  \u001b[38;5;66;03m# N_TOKENS=77\u001b[39;00m\n\u001b[32m     18\u001b[39m litert_torch.signature(\n\u001b[32m---> \u001b[39m\u001b[32m19\u001b[39m     \u001b[33m\"\u001b[39m\u001b[33mencode\u001b[39m\u001b[33m\"\u001b[39m, \u001b[43mclip_loaded\u001b[49m, (prompt_tokens,)\n\u001b[32m     20\u001b[39m ).convert(quant_config=quant_config).export(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mTEST_DIR\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m/clip_int4.tflite\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m     21\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mINT4 clip conversion done\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m     23\u001b[39m \u001b[38;5;66;03m# AOT 컴파일 테스트\u001b[39;00m\n",
+      "\u001b[31mNameError\u001b[39m: name 'clip_loaded' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "  import os\n",
+    "  import torch\n",
+    "  from litert_torch.generative.quantize import quant_attrs, quant_recipes\n",
+    "  import litert_torch\n",
+    "\n",
+    "  # INT4 BLOCKWISE_32 config\n",
+    "  quant_config = quant_recipes.full_dynamic_recipe(\n",
+    "      weight_dtype=quant_attrs.Dtype.INT4,\n",
+    "      granularity=quant_attrs.Granularity.BLOCKWISE_32,\n",
+    "  )\n",
+    "\n",
+    "  # clip 모델만 변환 (가장 가벼움)\n",
+    "  TEST_DIR = \"/tmp/sdxl_int4_test\"\n",
+    "  os.makedirs(TEST_DIR, exist_ok=True)\n",
+    "\n",
+    "  prompt_tokens = torch.full((1, 77), 0, dtype=torch.int)  # N_TOKENS=77\n",
+    "\n",
+    "  litert_torch.signature(\n",
+    "      \"encode\", clip_loaded, (prompt_tokens,)\n",
+    "  ).convert(quant_config=quant_config).export(f\"{TEST_DIR}/clip_int4.tflite\")\n",
+    "  print(\"INT4 clip conversion done\")\n",
+    "\n",
+    "  # AOT 컴파일 테스트\n",
+    "  from ai_edge_litert.aot import aot_compile as aot_lib\n",
+    "  from ai_edge_litert.aot.vendors.qualcomm import target as qnn_target\n",
+    "\n",
+    "  sm8850_target = qnn_target.Target(qnn_target.SocModel.SM8850)\n",
+    "  result = aot_lib.aot_compile(\n",
+    "      f\"{TEST_DIR}/clip_int4.tflite\",\n",
+    "      target=[sm8850_target],\n",
+    "      keep_going=True,\n",
+    "  )\n",
+    "\n",
+    "  print(f\"Success: {len(result.models_with_backend)}\")\n",
+    "  print(f\"Failed: {len(result.failed_backends)}\")\n",
+    "  for backend, error in result.failed_backends:\n",
+    "      print(f\"  {backend.target}: {error[:200]}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "429c16cb-c8bf-4d66-ae08-e7fb0b35d0b3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/tmp/sdxl_tflite: ['clip.tflite', 'open_clip.tflite', 'diffusion.tflite', 'decoder.tflite', 'text_projection.npy', 'generated_image.jpg', 'playground_00.png', 'playground_01.png', 'playground_02.png', 'playground_03.png', 'playground_grid.png', 'cfg_2.0.png', 'cfg_7.5.png', 'cfg_15.0.png', 'cfg_comparison.png', 'sampler_k_euler.png', 'sampler_k_euler_ancestral.png', 'sampler_k_lms.png', 'sampler_comparison.png']\n",
+      "/workspace/sdxl_tflite: ['open_clip.tflite', 'diffusion.tflite', 'decoder.tflite', 'text_projection.npy', 'generated_image.jpg', 'playground_00.png', 'playground_01.png', 'playground_02.png', 'playground_03.png', 'playground_grid.png', 'cfg_2.0.png', 'cfg_7.5.png', 'cfg_15.0.png', 'cfg_comparison.png', 'sampler_k_euler.png', 'sampler_k_euler_ancestral.png', 'sampler_k_lms.png', 'sampler_comparison.png', 'clip.tflite', '.ipynb_checkpoints']\n"
+     ]
+    }
+   ],
+   "source": [
+    "  import os\n",
+    "  # 이전 변환 결과가 남아있는지 확인\n",
+    "  for d in [\"/tmp/sdxl_tflite\", \"/tmp/sdxl_tflite_quantized\", \"/workspace/sdxl_tflite\"]:\n",
+    "      if os.path.exists(d):\n",
+    "          files = os.listdir(d)\n",
+    "          print(f\"{d}: {files}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "70087fe5-d3e8-4bf2-ba1c-92c128d88a0c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:311: RuntimeWarning: invalid value encountered in divide\n",
+      "  ret = np.divide(tensor_data, scales)\n",
+      "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:98: RuntimeWarning: invalid value encountered in cast\n",
+      "  return tensor.astype(qtype)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INT4 blockwise clip: 66.8 MB\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "27fdf6c884d9490f8909bf33a088a1c4",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Backend:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Success: 1\n",
+      "Failed: 0\n"
+     ]
+    }
+   ],
+   "source": [
+    "  from ai_edge_quantizer import quantizer as aie_quantizer\n",
+    "  from ai_edge_quantizer import recipe as aie_recipe\n",
+    "  from ai_edge_litert.aot import aot_compile as aot_lib\n",
+    "  from ai_edge_litert.aot.vendors.qualcomm import target as qnn_target\n",
+    "  import os\n",
+    "\n",
+    "  # 1. FP32 clip을 INT4 blockwise로 양자화\n",
+    "  SRC = \"/tmp/sdxl_tflite/clip.tflite\"\n",
+    "  TEST_DIR = \"/tmp/sdxl_int4_test\"\n",
+    "  os.makedirs(TEST_DIR, exist_ok=True)\n",
+    "\n",
+    "  qt.update_quantization_recipe(\n",
+    "      regex=\".*\",\n",
+    "      operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
+    "      algorithm_key=\"min_max_uniform_quantize\",\n",
+    "      op_config=aie_quantizer.qtyping.OpQuantizationConfig(\n",
+    "          weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
+    "              num_bits=4,\n",
+    "              symmetric=True,\n",
+    "              granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
+    "              dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
+    "          ),\n",
+    "          compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
+    "          explicit_dequantize=False,\n",
+    "      ),\n",
+    "  )\n",
+    "  result = qt.quantize()\n",
+    "  int4_path = f\"{TEST_DIR}/clip_int4.tflite\"\n",
+    "  with open(int4_path, \"wb\") as f:\n",
+    "      f.write(result.quantized_model)\n",
+    "  print(f\"INT4 blockwise clip: {os.path.getsize(int4_path) / 1024 / 1024:.1f} MB\")\n",
+    "\n",
+    "  # AOT 컴파일 테스트\n",
+    "  sm8850_target = qnn_target.Target(qnn_target.SocModel.SM8850)\n",
+    "  aot_result = aot_lib.aot_compile(\n",
+    "      int4_path,\n",
+    "      target=[sm8850_target],\n",
+    "      keep_going=True,\n",
+    "  )\n",
+    "  print(f\"Success: {len(aot_result.models_with_backend)}\")\n",
+    "  print(f\"Failed: {len(aot_result.failed_backends)}\")\n",
+    "  for backend, error in aot_result.failed_backends:\n",
+    "      print(f\"  Error: {error[:300]}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c9974fa3-8be4-4110-9194-892d05219ec7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "  COMPONENTS = [\"open_clip\", \"decoder\", \"diffusion\"]\n",
+    "  SRC_DIR = \"/tmp/sdxl_tflite\"\n",
+    "\n",
+    "  for name in COMPONENTS:\n",
+    "      src = f\"{SRC_DIR}/{name}.tflite\"\n",
+    "      print(f\"\\n=== {name} ===\")\n",
+    "\n",
+    "      qt = aie_quantizer.Quantizer(src)\n",
+    "      qt.update_quantization_recipe(\n",
+    "          regex=\".*\",\n",
+    "          operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
+    "          algorithm_key=\"min_max_uniform_quantize\",\n",
+    "          op_config=aie_quantizer.qtyping.OpQuantizationConfig(\n",
+    "              weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
+    "                  num_bits=4,\n",
+    "                  symmetric=True,\n",
+    "                  granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
+    "                  dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
+    "              ),\n",
+    "              compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
+    "              explicit_dequantize=False,\n",
+    "          ),\n",
+    "      )\n",
+    "      result = qt.quantize()\n",
+    "      out_path = f\"{TEST_DIR}/{name}_int4.tflite\"\n",
+    "      with open(out_path, \"wb\") as f:\n",
+    "          f.write(result.quantized_model)\n",
+    "\n",
+    "      orig_mb = os.path.getsize(src) / 1024 / 1024\n",
+    "      quant_mb = os.path.getsize(out_path) / 1024 / 1024\n",
+    "      print(f\"  Size: {orig_mb:.1f} MB -> {quant_mb:.1f} MB ({quant_mb/orig_mb*100:.0f}%)\")\n",
+    "\n",
+    "      aot_result = aot_lib.aot_compile(\n",
+    "          out_path,\n",
+    "          target=[sm8850_target],\n",
+    "          keep_going=True,\n",
+    "      )\n",
+    "      print(f\"  AOT: Success={len(aot_result.models_with_backend)}, Failed={len(aot_result.failed_backends)}\")\n",
+    "      for backend, error in aot_result.failed_backends:\n",
+    "          print(f\"  Error: {error[:300]}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "69c9286b-c246-438b-ac94-d5d7774de151",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Quantize: 469.6 MB -> 469.6 MB (100%)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7f78061d12674ad286882e3856f4e638",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Backend:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "AOT: Success=1, Failed=0\n"
+     ]
+    }
+   ],
+   "source": [
+    "  import os\n",
+    "  from ai_edge_quantizer import quantizer as aie_quantizer\n",
+    "  from ai_edge_litert.aot import aot_compile as aot_lib\n",
+    "  from ai_edge_litert.aot.vendors.qualcomm import target as qnn_target\n",
+    "\n",
+    "  SRC = \"/workspace/sdxl_tflite/clip.tflite\"\n",
+    "  TEST_DIR = \"/tmp/sdxl_int8_blockwise_test\"\n",
+    "  os.makedirs(TEST_DIR, exist_ok=True)\n",
+    "\n",
+    "  # INT8 BLOCKWISE_32\n",
+    "  qt = aie_quantizer.Quantizer(SRC)\n",
+    "  qt.update_quantization_recipe(\n",
+    "      regex=\".*\",\n",
+    "      operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
+    "      algorithm_key=\"min_max_uniform_quantize\",\n",
+    "      op_config=aie_quantizer.qtyping.OpQuantizationConfig(\n",
+    "          weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
+    "              num_bits=8,\n",
+    "              symmetric=True,\n",
+    "              granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
+    "              dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
+    "          ),\n",
+    "          compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
+    "          explicit_dequantize=False,\n",
+    "      ),\n",
+    "  )\n",
+    "  result = qt.quantize()\n",
+    "  out_path = f\"{TEST_DIR}/clip_int8_bw32.tflite\"\n",
+    "  with open(out_path, \"wb\") as f:\n",
+    "      f.write(result.quantized_model)\n",
+    "\n",
+    "  orig_mb = os.path.getsize(SRC) / 1024 / 1024\n",
+    "  quant_mb = os.path.getsize(out_path) / 1024 / 1024\n",
+    "  print(f\"Quantize: {orig_mb:.1f} MB -> {quant_mb:.1f} MB ({quant_mb/orig_mb*100:.0f}%)\")\n",
+    "\n",
+    "  # AOT\n",
+    "  sm8850_target = qnn_target.Target(qnn_target.SocModel.SM8850)\n",
+    "  aot_result = aot_lib.aot_compile(\n",
+    "      out_path,\n",
+    "      target=[sm8850_target],\n",
+    "      keep_going=True,\n",
+    "  )\n",
+    "  print(f\"AOT: Success={len(aot_result.models_with_backend)}, Failed={len(aot_result.failed_backends)}\")\n",
+    "  for backend, error in aot_result.failed_backends:\n",
+    "      print(f\"Error: {error[:300]}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "34754c5d-7180-4f3a-9171-de27988a28d9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.12/dist-packages/tensorflow/lite/python/interpreter.py:457: UserWarning:     Warning: tf.lite.Interpreter is deprecated and is scheduled for deletion in\n",
+      "    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.\n",
+      "    See the [migration guide](https://ai.google.dev/edge/litert/migration)\n",
+      "    for details.\n",
+      "    \n",
+      "  warnings.warn(_INTERPRETER_DELETION_WARNING)\n"
+     ]
+    }
+   ],
+   "source": [
+    "  # 양자화 전후 모델 내부 확인\n",
+    "  import tensorflow as tf\n",
+    "\n",
+    "  orig = tf.lite.Interpreter(model_path=\"/workspace/sdxl_tflite/clip.tflite\")\n",
+    "  orig.allocate_tensors()\n",
+    "\n",
+    "  quant = tf.lite.Interpreter(model_path=\"/tmp/sdxl_int8_blockwise_test/clip_int8_bw32.tflite\")\n",
+    "  quant.allocate_tensors()\n",
+    "\n",
+    "  # 가중치 텐서 dtype 비교\n",
+    "  print(\"=== Original ===\")\n",
+    "  for t in orig.get_tensor_details()[:5]:\n",
+    "      print(f\"  {t['name'][:50]:50s} dtype={t['dtype']}\")\n",
+    "\n",
+    "  print(\"\\n=== INT8 blockwise ===\")\n",
+    "  for t in quant.get_tensor_details()[:5]:\n",
+    "      print(f\"  {t['name'][:50]:50s} dtype={t['dtype']}\")\n",
+    "\n",
+    "  # INT4 것도 비교\n",
+    "  int4 = tf.lite.Interpreter(model_path=\"/tmp/sdxl_int4_test/clip_int4.tflite\")\n",
+    "  int4.allocate_tensors()\n",
+    "  print(\"\\n=== INT4 blockwise ===\")\n",
+    "  for t in int4.get_tensor_details()[:5]:\n",
+    "      print(f\"  {t['name'][:50]:50s} dtype={t['dtype']}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "98db8f3d-1099-468a-87b2-e7f60431b948",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Original: 469.6 MB\n",
+      "Quantized bytes: 469.6 MB\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:311: RuntimeWarning: invalid value encountered in divide\n",
+      "  ret = np.divide(tensor_data, scales)\n",
+      "/usr/local/lib/python3.12/dist-packages/ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py:98: RuntimeWarning: invalid value encountered in cast\n",
+      "  return tensor.astype(qtype)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INT4 quantized bytes: 66.8 MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "  import os\n",
+    "\n",
+    "  # 양자화만 다시 해서 확인 (AOT 안 거침)\n",
+    "  from ai_edge_quantizer import quantizer as aie_quantizer\n",
+    "\n",
+    "  SRC = \"/workspace/sdxl_tflite/clip.tflite\"\n",
+    "\n",
+    "  # INT8 blockwise 양자화만\n",
+    "  qt = aie_quantizer.Quantizer(SRC)\n",
+    "  qt.update_quantization_recipe(\n",
+    "      regex=\".*\",\n",
+    "      operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
+    "      algorithm_key=\"min_max_uniform_quantize\",\n",
+    "      op_config=aie_quantizer.qtyping.OpQuantizationConfig(\n",
+    "          weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
+    "              num_bits=8,\n",
+    "              symmetric=True,\n",
+    "              granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
+    "              dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
+    "          ),\n",
+    "          compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
+    "          explicit_dequantize=False,\n",
+    "      ),\n",
+    "  )\n",
+    "  result = qt.quantize()\n",
+    "\n",
+    "  # 양자화 결과 통계 확인\n",
+    "  print(f\"Original: {os.path.getsize(SRC) / 1024 / 1024:.1f} MB\")\n",
+    "  print(f\"Quantized bytes: {len(result.quantized_model) / 1024 / 1024:.1f} MB\")\n",
+    "\n",
+    "  # 양자화 로그 확인\n",
+    "  if hasattr(result, 'log'):\n",
+    "      print(f\"Log: {result.log}\")\n",
+    "  if hasattr(result, 'skipped_ops'):\n",
+    "      print(f\"Skipped: {result.skipped_ops}\")\n",
+    "\n",
+    "  # 비교: INT4도 같은 방식으로 크기 확인\n",
+    "  qt4 = aie_quantizer.Quantizer(SRC)\n",
+    "  qt4.update_quantization_recipe(\n",
+    "      regex=\".*\",\n",
+    "      operation_name=aie_quantizer.qtyping.TFLOperationName.ALL_SUPPORTED,\n",
+    "      algorithm_key=\"min_max_uniform_quantize\",\n",
+    "      op_config=aie_quantizer.qtyping.OpQuantizationConfig(\n",
+    "          weight_tensor_config=aie_quantizer.qtyping.TensorQuantizationConfig(\n",
+    "              num_bits=4,\n",
+    "              symmetric=True,\n",
+    "              granularity=aie_quantizer.qtyping.QuantGranularity.BLOCKWISE_32,\n",
+    "              dtype=aie_quantizer.qtyping.TensorDataType.INT,\n",
+    "          ),\n",
+    "          compute_precision=aie_quantizer.qtyping.ComputePrecision.INTEGER,\n",
+    "          explicit_dequantize=False,\n",
+    "      ),\n",
+    "  )\n",
+    "  result4 = qt4.quantize()\n",
+    "  print(f\"INT4 quantized bytes: {len(result4.quantized_model) / 1024 / 1024:.1f} MB\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "98d994de-58ff-4741-9521-f8d0823ff089",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

sdxl_porting_example.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

sdxl_tflite/cfg_15.0.png ADDED Viewed

Git LFS Details

SHA256: 86b78e3b8c7a22cc5b7866f47fdfa5ad24471a1b1a3cbf0f275f1d820ca0af3e
Pointer size: 132 Bytes
Size of remote file: 1.91 MB

sdxl_tflite/cfg_2.0.png ADDED Viewed

Git LFS Details

SHA256: d16397bb98187cbe3be3a5a99bdc1f97a67e8902a3819aead090cabb8b260328
Pointer size: 132 Bytes
Size of remote file: 1.89 MB

sdxl_tflite/cfg_7.5.png ADDED Viewed

Git LFS Details

SHA256: aecf3d3f0fef0bdbe3d83b68f680ff9da24f1e59285b512e2f52364d5177de03
Pointer size: 132 Bytes
Size of remote file: 1.85 MB

sdxl_tflite/cfg_comparison.png ADDED Viewed

Git LFS Details

SHA256: 9780b2188638568b5dd096675b1aaa7c0953f5cd41eb5ca459d879d1587c2059
Pointer size: 132 Bytes
Size of remote file: 1.2 MB

sdxl_tflite/clip.tflite ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d3573a0b57173b7753267a50a720dea04b7de6993ee3b279ce9d7935b2d603e3
+size 492412148

sdxl_tflite/decoder.tflite ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:20134f17c83dfb234a1121ffb30490aaaf4e199286da2e09673fe15a9a0a98ed
+size 198431096

sdxl_tflite/generated_image.jpg ADDED Viewed

Git LFS Details

SHA256: 3ebf5caec793ef5d919f781e80fc874cc9ff4e36b08783fd3af73e4e1451939f
Pointer size: 131 Bytes
Size of remote file: 114 kB

sdxl_tflite/playground_00.png ADDED Viewed

Git LFS Details

SHA256: 6e30464558bc46d5970fc93d9c563f053d34aa01c239157bd4ebfffd226194ba
Pointer size: 132 Bytes
Size of remote file: 1.6 MB

sdxl_tflite/playground_01.png ADDED Viewed

Git LFS Details

SHA256: df7aedf498b10d2543f0ca59d484fa0fc720781f6a3abcd0f215467adc5434f6
Pointer size: 132 Bytes
Size of remote file: 1.4 MB

sdxl_tflite/playground_02.png ADDED Viewed

Git LFS Details

SHA256: e526d321dab805b09554517f0d6777b6f0dc5c44e8a2a03df94aff29eb750f80
Pointer size: 132 Bytes
Size of remote file: 1.83 MB

sdxl_tflite/playground_03.png ADDED Viewed

Git LFS Details

SHA256: 0665b7413e48d08fc7b0edd1707cabe3e6bf200a6e7b1f0b7fe358b2dd714d12
Pointer size: 132 Bytes
Size of remote file: 1.93 MB

sdxl_tflite/playground_grid.png ADDED Viewed

Git LFS Details

SHA256: ed54dbbec18f8915a09b38c89e0ecade1422590f93701e7bd87a528935c8fa04
Pointer size: 132 Bytes
Size of remote file: 1.62 MB

sdxl_tflite/sampler_comparison.png ADDED Viewed

Git LFS Details

SHA256: 617fb5177d33dee2314384f34034c4f0bf60f2b270ffbc9da219905796284f17
Pointer size: 132 Bytes
Size of remote file: 1.46 MB

sdxl_tflite/sampler_k_euler.png ADDED Viewed

Git LFS Details

SHA256: fe88101a37201f3e3bdd595f576b66cfe48ce209b1e996a5456f6e6cc66c5e1f
Pointer size: 132 Bytes
Size of remote file: 2.23 MB

sdxl_tflite/sampler_k_euler_ancestral.png ADDED Viewed

Git LFS Details

SHA256: e05ba0f1eb2aa7161deba099c18c3d3269ba98b5ea8840d5b41e7797e41172d9
Pointer size: 132 Bytes
Size of remote file: 2.02 MB

sdxl_tflite/sampler_k_lms.png ADDED Viewed

Git LFS Details

SHA256: 0766438f2eeae2d588ad65023359f331fdd24891249d5d54e875aeaff2e6dfbb
Pointer size: 132 Bytes
Size of remote file: 2.16 MB

sdxl_tflite/text_projection.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:19c2977d1573888208415c96531f65558cdeb76c9a5e1bcc8a3d1df6a55022f8
+size 6553728

sdxl_tflite_aot_int4_blockwise_32/_compiled_models/diffusion_int4_Qualcomm_SM8850_apply_plugin.tflite ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c467a24fc1ae7aa397e74f0480ba981bdebe4b94b5808cb6a220bc350816acf
+size 2587049280

sdxl_tflite_fp16/_compiled_models/diffusion_Qualcomm_SM8850_apply_plugin.tflite ADDED Viewed

File without changes

sdxl_tflite_fp16/clip.tflite ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57feae823b13c5040150bcd8fd3f73285462cdc0892106991aecf44cecbe084a
+size 246676112

sdxl_tflite_fp16/decoder.tflite ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:280cd22872a0f36416e5690e8a546fb91b33992e77fc6b65da0c5f8c4f11a57e
+size 99544864

sdxl_tflite_fp16/diffusion.tflite ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5909a6b20f694751dfd05249327790892785aed03b653918bf28320557c2ad78
+size 5144262816

sdxl_tflite_fp16/open_clip.tflite ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:45e459f77df8a0a8cc366a0e9b7cf00c3fa0ad3e6acb6ca91bd847c4cd2a3627
+size 1387438216

sdxl_tflite_fp16/text_projection.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ce2d9bc5c2b839e18fc4ab2a2f70e4f7840c9b6ff646f0881b0cd15e04c8c48d
+size 6553600

sdxl_tflite_fp16/text_projection.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:19c2977d1573888208415c96531f65558cdeb76c9a5e1bcc8a3d1df6a55022f8
+size 6553728

sdxl_tflite_quantized/_compiled_models/clip_Qualcomm_SM8850_apply_plugin.tflite ADDED Viewed

File without changes

sdxl_tflite_quantized/_compiled_models/decoder_Qualcomm_SM8850_apply_plugin.tflite ADDED Viewed

File without changes

sdxl_tflite_quantized/_compiled_models/diffusion_Qualcomm_SM8850_apply_plugin.tflite ADDED Viewed

File without changes

sdxl_tflite_quantized/_compiled_models/open_clip_Qualcomm_SM8850_apply_plugin.tflite ADDED Viewed

File without changes

sdxl_tflite_quantized/clip.tflite ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c2eb43f4edab9ca36792980b6a466ea5f1810e0fa15cdc301e930a19b65f4f3c
+size 125385272

sdxl_tflite_quantized/decoder.tflite ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e7c953730ea9d3733664f63832dbff9c2ab782c7d7dc1362d66a2700bcf4fea9
+size 50269912

sdxl_tflite_quantized/diffusion.tflite ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0acb90bca9400caa200d5d11aa557ff05ccf07d26825d2c4e66daf11b2bec931
+size 2596327472

sdxl_tflite_quantized/open_clip.tflite ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2d28b58774329f31487b76aee6bfde7870e5cf5d7b3111388649ff9d03444752
+size 700069088