Upload Create_SDNQ_from_klein_9b_transformer_model.ipynb

Browse files

Files changed (1) hide show

colab_notebooks/Create_SDNQ_from_klein_9b_transformer_model.ipynb +1100 -0

colab_notebooks/Create_SDNQ_from_klein_9b_transformer_model.ipynb ADDED Viewed

	@@ -0,0 +1,1100 @@

+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# create SDNQ"
+      ],
+      "metadata": {
+        "id": "9vXxafECsKC_"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@markdown # CELL 1 — Setup environment + login\n",
+        "\n",
+        "from google.colab import drive, userdata\n",
+        "from huggingface_hub import login\n",
+        "import torch, os, gc\n",
+        "\n",
+        "drive.mount(\"/content/drive\")\n",
+        "\n",
+        "hf_token = userdata.get(\"HF_TOKEN\")\n",
+        "if hf_token:\n",
+        "    login(token=hf_token)\n",
+        "else:\n",
+        "    raise ValueError(\"HF_TOKEN not found in Google Colab secrets\")\n",
+        "\n",
+        "print(\"✅ Logged into Hugging Face\")"
+      ],
+      "metadata": {
+        "id": "XzgpnXJhsHg-"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@markdown # CELL 2 — Install dependencies\n",
+        "\n",
+        "!pip install -q safetensors huggingface_hub diffusers transformers accelerate\n",
+        "\n",
+        "print(\"✅ Dependencies installed\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "XXld_BQ1sU0v",
+        "outputId": "38508641-2a6a-4a19-f585-d2ac2d2faef4"
+      },
+      "execution_count": 7,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "✅ Dependencies installed\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import torch\n",
+        "from transformers import BitsAndBytesConfig\n",
+        "from diffusers import DiffusionPipeline\n",
+        "\n",
+        "# ============================================================\n",
+        "# BNB CONFIG\n",
+        "# ============================================================\n",
+        "\n",
+        "bnb_config = BitsAndBytesConfig(\n",
+        "    load_in_4bit=True,\n",
+        "    bnb_4bit_quant_type=\"nf4\",\n",
+        "    bnb_4bit_compute_dtype=torch.bfloat16,\n",
+        "    bnb_4bit_use_double_quant=True,\n",
+        ")\n",
+        "\n",
+        "# ============================================================\n",
+        "# LOAD PIPELINE\n",
+        "# ============================================================\n",
+        "\n",
+        "pipe = DiffusionPipeline.from_pretrained(\n",
+        "    \"black-forest-labs/FLUX.2-klein-9B\",\n",
+        "    # Removed: quantization_config=bnb_config,\n",
+        "    torch_dtype=torch.bfloat16,\n",
+        "\n",
+        "    # Pass individual quantization parameters directly\n",
+        "    load_in_4bit=bnb_config.load_in_4bit,\n",
+        "    bnb_4bit_quant_type=bnb_config.bnb_4bit_quant_type,\n",
+        "    bnb_4bit_compute_dtype=bnb_config.bnb_4bit_compute_dtype,\n",
+        "    bnb_4bit_use_double_quant=bnb_config.bnb_4bit_use_double_quant,\n",
+        "\n",
+        "    # critical:\n",
+        "    device_map=\"cpu\", # Changed 'auto' to 'cpu'\n",
+        "\n",
+        "    # enables CPU offload folder\n",
+        "    offload_folder=\"/content/offload\",\n",
+        "\n",
+        "    # low RAM loading\n",
+        "    low_cpu_mem_usage=True,\n",
+        ")\n",
+        "\n",
+        "# ============================================================\n",
+        "# ENABLE FULL CPU OFFLOAD\n",
+        "# ============================================================\n",
+        "\n",
+        "pipe.enable_model_cpu_offload()\n",
+        "\n",
+        "print(\"✅ Loaded with bitsandbytes + CPU offload\")"
+      ],
+      "metadata": {
+        "id": "i9MbakNlsckw"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import gc , torch\n",
+        "gc.collect()\n",
+        "torch.cuda.empty_cache()"
+      ],
+      "metadata": {
+        "id": "rQ5cr5ZsUQqG"
+      },
+      "execution_count": 8,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install bitsandbytes\n",
+        "import torch\n",
+        "import gc\n",
+        "import bitsandbytes as bnb\n",
+        "\n",
+        "from torch import nn\n",
+        "\n",
+        "# ============================================================\n",
+        "# DEQUANTIZE BNB MODEL\n",
+        "# ============================================================\n",
+        "\n",
+        "def dequantize_bnb_linear(layer):\n",
+        "    \"\"\"\n",
+        "    Convert Linear4bit -> torch.nn.Linear\n",
+        "    \"\"\"\n",
+        "\n",
+        "    # Reconstruct full precision weights\n",
+        "    weight = layer.weight.dequantize()\n",
+        "\n",
+        "    new_layer = nn.Linear(\n",
+        "        layer.in_features,\n",
+        "        layer.out_features,\n",
+        "        bias=layer.bias is not None,\n",
+        "        dtype=weight.dtype,\n",
+        "        device=\"cpu\",\n",
+        "    )\n",
+        "\n",
+        "    new_layer.weight.data.copy_(weight.cpu())\n",
+        "\n",
+        "    if layer.bias is not None:\n",
+        "        new_layer.bias.data.copy_(layer.bias.data.cpu())\n",
+        "\n",
+        "    return new_layer\n",
+        "\n",
+        "\n",
+        "def recursively_dequantize(module):\n",
+        "    \"\"\"\n",
+        "    Recursively replace all BNB 4bit layers.\n",
+        "    \"\"\"\n",
+        "\n",
+        "    for name, child in list(module.named_children()):\n",
+        "\n",
+        "        # Replace Linear4bit\n",
+        "        if isinstance(child, bnb.nn.Linear4bit):\n",
+        "            print(f\"Dequantizing: {name}\")\n",
+        "\n",
+        "            setattr(\n",
+        "                module,\n",
+        "                name,\n",
+        "                dequantize_bnb_linear(child)\n",
+        "            )\n",
+        "\n",
+        "        else:\n",
+        "            recursively_dequantize(child)\n",
+        "\n",
+        "\n",
+        "# ============================================================\n",
+        "# RUN DEQUANTIZATION\n",
+        "# ============================================================\n",
+        "\n",
+        "pipe.to(\"cpu\")\n",
+        "\n",
+        "gc.collect()\n",
+        "torch.cuda.empty_cache()\n",
+        "\n",
+        "# Apply dequantization to the individual sub-modules within the pipeline\n",
+        "print(\"Dequantizing pipe.transformer...\")\n",
+        "recursively_dequantize(pipe.transformer)\n",
+        "print(\"Dequantizing pipe.text_encoder...\")\n",
+        "recursively_dequantize(pipe.text_encoder)\n",
+        "print(\"Dequantizing pipe.vae...\")\n",
+        "recursively_dequantize(pipe.vae)\n",
+        "\n",
+        "gc.collect()\n",
+        "torch.cuda.empty_cache()\n",
+        "\n",
+        "print(\"✅ Fully dequantized back to FP weights\")"
+      ],
+      "metadata": {
+        "id": "GbZh_p6zSeTy"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#print(\"🧹 Removing old diffusers...\")\n",
+        "#!pip uninstall -y diffusers > /dev/null 2>&1\n",
+        "#!rm -rf /usr/local/lib/python3.12/dist-packages/diffusers* ~/.cache/pip/*diffusers*\n",
+        "\n",
+        "!pip install -q --upgrade huggingface_hub transformers accelerate diffusers\n",
+        "!pip install -q sdnq\n",
+        "\n",
+        "#print(\"🔄 Installing latest diffusers...\")\n",
+        "#!pip install -q git+https://github.com/huggingface/diffusers.git --force-reinstall --no-deps\n",
+        "#!python -m pip cache purge\n",
+        "\n",
+        "print(\"✅ Cell 1 complete!\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "aGH03dqUQcsQ",
+        "outputId": "b5d8cd52-aa9b-49f6-c43b-0f9c75fe210f"
+      },
+      "execution_count": 10,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "✅ Cell 1 complete!\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sdnq.common import accepted_weight_dtypes\n",
+        "accepted_weight_dtypes"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "EpL4Kek7AbjZ",
+        "outputId": "9c46f98d-cbd0-498f-bb94-33b6860f49c6"
+      },
+      "execution_count": 7,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "{'bf16',\n",
+              " 'bfloat16',\n",
+              " 'bool',\n",
+              " 'float10_e1m8fn',\n",
+              " 'float10_e1m9fnu',\n",
+              " 'float10_e2m7fn',\n",
+              " 'float10_e2m8fnu',\n",
+              " 'float10_e3m6fn',\n",
+              " 'float10_e3m7fnu',\n",
+              " 'float10_e4m5fn',\n",
+              " 'float10_e4m6fnu',\n",
+              " 'float10_e5m4fn',\n",
+              " 'float10_e5m5fnu',\n",
+              " 'float11_e1m10fnu',\n",
+              " 'float11_e1m9fn',\n",
+              " 'float11_e2m8fn',\n",
+              " 'float11_e2m9fnu',\n",
+              " 'float11_e3m7fn',\n",
+              " 'float11_e3m8fnu',\n",
+              " 'float11_e4m6fn',\n",
+              " 'float11_e4m7fnu',\n",
+              " 'float11_e5m5fn',\n",
+              " 'float11_e5m6fnu',\n",
+              " 'float12_e1m10fn',\n",
+              " 'float12_e1m11fnu',\n",
+              " 'float12_e2m10fnu',\n",
+              " 'float12_e2m9fn',\n",
+              " 'float12_e3m8fn',\n",
+              " 'float12_e3m9fnu',\n",
+              " 'float12_e4m7fn',\n",
+              " 'float12_e4m8fnu',\n",
+              " 'float12_e5m6fn',\n",
+              " 'float12_e5m7fnu',\n",
+              " 'float13_e1m11fn',\n",
+              " 'float13_e1m12fnu',\n",
+              " 'float13_e2m10fn',\n",
+              " 'float13_e2m11fnu',\n",
+              " 'float13_e3m10fnu',\n",
+              " 'float13_e3m9fn',\n",
+              " 'float13_e4m8fn',\n",
+              " 'float13_e4m9fnu',\n",
+              " 'float13_e5m7fn',\n",
+              " 'float13_e5m8fnu',\n",
+              " 'float14_e1m12fn',\n",
+              " 'float14_e1m13fnu',\n",
+              " 'float14_e2m11fn',\n",
+              " 'float14_e2m12fnu',\n",
+              " 'float14_e3m10fn',\n",
+              " 'float14_e3m11fnu',\n",
+              " 'float14_e4m10fnu',\n",
+              " 'float14_e4m9fn',\n",
+              " 'float14_e5m8fn',\n",
+              " 'float14_e5m9fnu',\n",
+              " 'float15_e1m13fn',\n",
+              " 'float15_e1m14fnu',\n",
+              " 'float15_e2m12fn',\n",
+              " 'float15_e2m13fnu',\n",
+              " 'float15_e3m11fn',\n",
+              " 'float15_e3m12fnu',\n",
+              " 'float15_e4m10fn',\n",
+              " 'float15_e4m11fnu',\n",
+              " 'float15_e5m10fnu',\n",
+              " 'float15_e5m9fn',\n",
+              " 'float16',\n",
+              " 'float16_e1m14fn',\n",
+              " 'float16_e1m15fnu',\n",
+              " 'float16_e2m13fn',\n",
+              " 'float16_e2m14fnu',\n",
+              " 'float16_e3m12fn',\n",
+              " 'float16_e3m13fnu',\n",
+              " 'float16_e4m11fn',\n",
+              " 'float16_e4m12fnu',\n",
+              " 'float16_e5m10fn',\n",
+              " 'float16_e5m11fnu',\n",
+              " 'float1_e1m0fnu',\n",
+              " 'float2_e1m0fn',\n",
+              " 'float2_e1m1fnu',\n",
+              " 'float2_e2m0fnu',\n",
+              " 'float32',\n",
+              " 'float3_e1m1fn',\n",
+              " 'float3_e1m2fnu',\n",
+              " 'float3_e2m0fn',\n",
+              " 'float3_e2m1fnu',\n",
+              " 'float3_e3m0fnu',\n",
+              " 'float4_e1m2fn',\n",
+              " 'float4_e1m3fnu',\n",
+              " 'float4_e2m1fn',\n",
+              " 'float4_e2m2fnu',\n",
+              " 'float4_e3m0fn',\n",
+              " 'float4_e3m1fnu',\n",
+              " 'float4_e4m0fnu',\n",
+              " 'float5_e1m3fn',\n",
+              " 'float5_e1m4fnu',\n",
+              " 'float5_e2m2fn',\n",
+              " 'float5_e2m3fnu',\n",
+              " 'float5_e3m1fn',\n",
+              " 'float5_e3m2fnu',\n",
+              " 'float5_e4m0fn',\n",
+              " 'float5_e4m1fnu',\n",
+              " 'float5_e5m0fnu',\n",
+              " 'float6_e1m4fn',\n",
+              " 'float6_e1m5fnu',\n",
+              " 'float6_e2m3fn',\n",
+              " 'float6_e2m4fnu',\n",
+              " 'float6_e3m2fn',\n",
+              " 'float6_e3m3fnu',\n",
+              " 'float6_e4m1fn',\n",
+              " 'float6_e4m2fnu',\n",
+              " 'float6_e5m0fn',\n",
+              " 'float6_e5m1fnu',\n",
+              " 'float7_e1m5fn',\n",
+              " 'float7_e1m6fnu',\n",
+              " 'float7_e2m4fn',\n",
+              " 'float7_e2m5fnu',\n",
+              " 'float7_e3m3fn',\n",
+              " 'float7_e3m4fnu',\n",
+              " 'float7_e4m2fn',\n",
+              " 'float7_e4m3fnu',\n",
+              " 'float7_e5m1fn',\n",
+              " 'float7_e5m2fnu',\n",
+              " 'float8_e1m6fn',\n",
+              " 'float8_e1m7fnu',\n",
+              " 'float8_e2m5fn',\n",
+              " 'float8_e2m6fnu',\n",
+              " 'float8_e3m4fn',\n",
+              " 'float8_e3m5fnu',\n",
+              " 'float8_e4m3fn',\n",
+              " 'float8_e4m3fn_sdnq',\n",
+              " 'float8_e4m3fnuz',\n",
+              " 'float8_e4m4fnu',\n",
+              " 'float8_e5m2',\n",
+              " 'float8_e5m2fn',\n",
+              " 'float8_e5m2fnuz',\n",
+              " 'float8_e5m3fnu',\n",
+              " 'float8_e8m0fnu',\n",
+              " 'float9_e1m7fn',\n",
+              " 'float9_e1m8fnu',\n",
+              " 'float9_e2m6fn',\n",
+              " 'float9_e2m7fnu',\n",
+              " 'float9_e3m5fn',\n",
+              " 'float9_e3m6fnu',\n",
+              " 'float9_e4m4fn',\n",
+              " 'float9_e4m5fnu',\n",
+              " 'float9_e5m3fn',\n",
+              " 'float9_e5m4fnu',\n",
+              " 'fp1',\n",
+              " 'fp10',\n",
+              " 'fp11',\n",
+              " 'fp12',\n",
+              " 'fp13',\n",
+              " 'fp14',\n",
+              " 'fp15',\n",
+              " 'fp16',\n",
+              " 'fp2',\n",
+              " 'fp3',\n",
+              " 'fp32',\n",
+              " 'fp4',\n",
+              " 'fp5',\n",
+              " 'fp6',\n",
+              " 'fp7',\n",
+              " 'fp8',\n",
+              " 'fp9',\n",
+              " 'int1',\n",
+              " 'int10',\n",
+              " 'int11',\n",
+              " 'int12',\n",
+              " 'int13',\n",
+              " 'int14',\n",
+              " 'int15',\n",
+              " 'int16',\n",
+              " 'int2',\n",
+              " 'int3',\n",
+              " 'int32',\n",
+              " 'int4',\n",
+              " 'int5',\n",
+              " 'int6',\n",
+              " 'int7',\n",
+              " 'int8',\n",
+              " 'int9',\n",
+              " 'ufp1',\n",
+              " 'ufp10',\n",
+              " 'ufp11',\n",
+              " 'ufp12',\n",
+              " 'ufp13',\n",
+              " 'ufp14',\n",
+              " 'ufp15',\n",
+              " 'ufp16',\n",
+              " 'ufp2',\n",
+              " 'ufp3',\n",
+              " 'ufp4',\n",
+              " 'ufp5',\n",
+              " 'ufp6',\n",
+              " 'ufp7',\n",
+              " 'ufp8',\n",
+              " 'ufp9',\n",
+              " 'uint1',\n",
+              " 'uint10',\n",
+              " 'uint11',\n",
+              " 'uint12',\n",
+              " 'uint13',\n",
+              " 'uint14',\n",
+              " 'uint15',\n",
+              " 'uint16',\n",
+              " 'uint2',\n",
+              " 'uint3',\n",
+              " 'uint32',\n",
+              " 'uint4',\n",
+              " 'uint5',\n",
+              " 'uint6',\n",
+              " 'uint7',\n",
+              " 'uint8',\n",
+              " 'uint9'}"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 7
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "pipe"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "iJaa_thrWa22",
+        "outputId": "f8109023-9b9e-4b84-ba45-c518a1b36579"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "Flux2KleinPipeline {\n",
+              "  \"_class_name\": \"Flux2KleinPipeline\",\n",
+              "  \"_diffusers_version\": \"0.38.0\",\n",
+              "  \"_name_or_path\": \"black-forest-labs/FLUX.2-klein-9B\",\n",
+              "  \"is_distilled\": true,\n",
+              "  \"scheduler\": [\n",
+              "    \"diffusers\",\n",
+              "    \"FlowMatchEulerDiscreteScheduler\"\n",
+              "  ],\n",
+              "  \"text_encoder\": [\n",
+              "    \"transformers\",\n",
+              "    \"Qwen3ForCausalLM\"\n",
+              "  ],\n",
+              "  \"tokenizer\": [\n",
+              "    \"transformers\",\n",
+              "    \"Qwen2Tokenizer\"\n",
+              "  ],\n",
+              "  \"transformer\": [\n",
+              "    \"diffusers\",\n",
+              "    \"Flux2Transformer2DModel\"\n",
+              "  ],\n",
+              "  \"vae\": [\n",
+              "    \"diffusers\",\n",
+              "    \"AutoencoderKLFlux2\"\n",
+              "  ]\n",
+              "}"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 18
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import torch, gc\n",
+        "from sdnq import SDNQConfig , sdnq_post_load_quant\n",
+        "\n",
+        "# Apply SDNQ to transformer\n",
+        "pipe.transformer = sdnq_post_load_quant(\n",
+        "    pipe.transformer,\n",
+        "    use_dynamic_quantization=True,\n",
+        "    weights_dtype=\"uint4\",\n",
+        "    dynamic_loss_threshold=1e-2,\n",
+        "    use_svd=True,\n",
+        "    group_size=0,\n",
+        "    quantization_device=\"cuda\",\n",
+        "    return_device=\"cpu\",\n",
+        "    quant_conv=False,\n",
+        "    quant_embedding=False,\n",
+        ")\n",
+        "\n",
+        "import torch\n",
+        "with torch.no_grad():\n",
+        "  transformer_save_path = \"/content/transformer\"\n",
+        "  pipe.transformer.save_pretrained(transformer_save_path,safe_serialization=True , max_shard_size='2GB')\n",
+        "  print(f\"✅ pipe.transformer saved to: {transformer_save_path}\")"
+      ],
+      "metadata": {
+        "id": "hWAkx7u3XfdY"
+      },
+      "execution_count": 12,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import torch, gc\n",
+        "from sdnq import SDNQConfig , sdnq_post_load_quant\n",
+        "\n",
+        "# Apply SDNQ to transformer\n",
+        "pipe.text_encoder = sdnq_post_load_quant(\n",
+        "    pipe.text_encoder,\n",
+        "    use_dynamic_quantization=True,\n",
+        "    weights_dtype=\"uint2\",\n",
+        "    dynamic_loss_threshold=1e-2,\n",
+        "    use_svd=True,\n",
+        "    group_size=0,\n",
+        "    quantization_device=\"cuda\",\n",
+        "    return_device=\"cpu\",\n",
+        "    quant_conv=False,\n",
+        "    quant_embedding=False,\n",
+        ")\n",
+        "\n",
+        "import torch\n",
+        "with torch.no_grad():\n",
+        "  text_encoder_save_path = \"/content/text_encoder\"\n",
+        "  pipe.text_encoder.save_pretrained(text_encoder_save_path,safe_serialization=True , max_shard_size='2GB')\n",
+        "  print(f\"✅ pipe.text_encoder saved to: {text_encoder_save_path}\")"
+      ],
+      "metadata": {
+        "id": "uSPLPAVwgXp_"
+      },
+      "execution_count": 10,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import gc , torch\n",
+        "gc.collect()\n",
+        "torch.cuda.empty_cache()"
+      ],
+      "metadata": {
+        "id": "Xvnz0s2AV0VW"
+      },
+      "execution_count": 3,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import torch, gc\n",
+        "from sdnq import SDNQConfig , sdnq_post_load_quant\n",
+        "\n",
+        "import torch\n",
+        "with torch.no_grad():\n",
+        "  vae_save_path = \"/content/vae\"\n",
+        "  pipe.vae.save_pretrained(vae_save_path,safe_serialization=True , max_shard_size='2GB')\n",
+        "  print(f\"✅ pipe.vae saved to: {vae_save_path}\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "4PBgpLyDlwn4",
+        "outputId": "b9419cc2-5b2d-4909-980d-c424b00e86e0"
+      },
+      "execution_count": 11,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "✅ pipe.vae saved to: /content/vae\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import torch\n",
+        "with torch.no_grad():\n",
+        "  scheduler_save_path = \"/content/scheduler\"\n",
+        "  pipe.scheduler.save_pretrained(scheduler_save_path,safe_serialization=True , max_shard_size='2GB')\n",
+        "  print(f\"✅ pipe.scheduler saved to: {scheduler_save_path}\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "nxJZeqwlmgcR",
+        "outputId": "e4155232-bd12-48fc-b80d-db1358c6f1fb"
+      },
+      "execution_count": 12,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "✅ pipe.scheduler saved to: /content/scheduler\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import torch\n",
+        "with torch.no_grad():\n",
+        "  tokenizer_save_path = \"/content/tokenizer\"\n",
+        "  pipe.tokenizer.save_pretrained(tokenizer_save_path,safe_serialization=True , max_shard_size='2GB')\n",
+        "  print(f\"✅ pipe.tokenizer saved to: {tokenizer_save_path}\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "ZGOf4WtqFH6g",
+        "outputId": "cac3ffff-efd6-4255-c7e5-86ae75921a00"
+      },
+      "execution_count": 13,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "✅ pipe.tokenizer saved to: /content/tokenizer\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "fbf8ead6"
+      },
+      "source": [
+        "import os\n",
+        "from huggingface_hub import HfApi, login, create_repo\n",
+        "from google.colab import userdata\n",
+        "from huggingface_hub.utils import HfHubHTTPError\n",
+        "\n",
+        "# Retrieve hf_token from Colab secrets\n",
+        "hf_token = userdata.get(\"HF_TOKEN\")\n",
+        "if not hf_token:\n",
+        "    raise ValueError(\"HF_TOKEN not found in Google Colab secrets. Please ensure it is set.\")\n",
+        "\n",
+        "# Login to Hugging Face Hub\n",
+        "print(\"\\nLogging into Hugging Face...\")\n",
+        "login(token=hf_token)\n",
+        "\n",
+        "api = HfApi()\n",
+        "\n",
+        "# Define the target repository ID as specified by the user\n",
+        "repo_id = \"codeShare/FLUX.2-klein-9b-SDNQ-2bit\"\n",
+        "\n",
+        "# Check if repository exists, if not, create it\n",
+        "print(f\"Checking if repository {repo_id} exists...\")\n",
+        "try:\n",
+        "    if not api.repo_exists(repo_id=repo_id, repo_type=\"model\"):\n",
+        "        print(f\"Repository {repo_id} not found. Creating it...\")\n",
+        "        create_repo(repo_id=repo_id, repo_type=\"model\", private=False, token=hf_token)\n",
+        "        print(f\"Repository {repo_id} created successfully.\")\n",
+        "    else:\n",
+        "        print(f\"Repository {repo_id} already exists.\")\n",
+        "except HfHubHTTPError as e:\n",
+        "    print(f\"An error occurred while checking or creating the repository: {e}\")\n",
+        "    raise # Re-raise other HTTP errors\n",
+        "\n",
+        "# Define paths for the folders to upload\n",
+        "folders_to_upload = [\"/content/vae\", \"/content/tokenizer\", \"/content/scheduler\"]\n",
+        "\n",
+        "# Define the source path for model.safetensors.index.json and its target name in the repo\n",
+        "model_index_source_path = \"/content/text_encoder/model.safetensors.index.json\"\n",
+        "model_index_target_filename = \"model_index.json\"\n",
+        "\n",
+        "print(f\"\\nUploading specified components to {repo_id}...\")\n",
+        "\n",
+        "# Upload each specified folder\n",
+        "for folder_path in folders_to_upload:\n",
+        "    if os.path.isdir(folder_path):\n",
+        "        print(f\"Uploading folder: {folder_path}...\")\n",
+        "        api.upload_folder(\n",
+        "            folder_path=folder_path,\n",
+        "            repo_id=repo_id,\n",
+        "            repo_type=\"model\",\n",
+        "            commit_message=f\"Upload {os.path.basename(folder_path)} component\",\n",
+        "        )\n",
+        "        print(f\"✅ Folder {folder_path} uploaded.\")\n",
+        "    else:\n",
+        "        print(f\"⚠️ Folder not found, skipping: {folder_path}\")\n",
+        "\n",
+        "# Upload model.safetensors.index.json as model_index.json\n",
+        "if os.path.exists(model_index_source_path):\n",
+        "    print(f\"Uploading file: {model_index_source_path} as {model_index_target_filename}...\")\n",
+        "    api.upload_file(\n",
+        "        path_or_fileobj=model_index_source_path,\n",
+        "        path_in_repo=model_index_target_filename,\n",
+        "        repo_id=repo_id,\n",
+        "        repo_type=\"model\",\n",
+        "        commit_message=f\"Upload {model_index_target_filename}\",\n",
+        "    )\n",
+        "    print(f\"✅ File {model_index_source_path} uploaded as {model_index_target_filename}.\")\n",
+        "else:\n",
+        "    print(f\"⚠️ {model_index_source_path} not found, skipping upload of {model_index_target_filename}.\")\n",
+        "\n",
+        "print(\"\\n✅ All specified components processed for upload to Hugging Face Hub.\")"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "533c64b4"
+      },
+      "source": [
+        "import torch\n",
+        "import gc\n",
+        "import os\n",
+        "import shutil\n",
+        "from huggingface_hub import login\n",
+        "from diffusers import Flux2KleinPipeline, Flux2Transformer2DModel, AutoencoderKLFlux2\n",
+        "from transformers import Qwen2Tokenizer, Qwen3ForCausalLM\n",
+        "from sdnq import SDNQConfig, sdnq_post_load_quant # Ensure sdnq is imported\n",
+        "\n",
+        "# Define paths for local saved components\n",
+        "text_encoder_local_path = \"/content/text_encoder\"\n",
+        "transformer_local_path = \"/content/transformer\"\n",
+        "vae_local_path = \"/content/vae\"\n",
+        "\n",
+        "# Define the temporary directory to assemble the pipeline\n",
+        "rebuilt_pipeline_dir = \"/content/rebuilt_pipeline_temp\"\n",
+        "\n",
+        "print(f\"🔄 Rebuilding pipeline components in {rebuilt_pipeline_dir}...\")\n",
+        "\n",
+        "# Clean up and create the temporary directory\n",
+        "shutil.rmtree(rebuilt_pipeline_dir, ignore_errors=True)\n",
+        "os.makedirs(rebuilt_pipeline_dir, exist_ok=True)\n",
+        "\n",
+        "# Step 1: Load and save tokenizer and scheduler from the original model\n",
+        "# These components were not SDNQ'd or individually saved as models, so we fetch them from the source.\n",
+        "print(\"Loading original pipeline briefly to extract tokenizer and scheduler...\")\n",
+        "original_pipe_for_components = Flux2KleinPipeline.from_pretrained(\n",
+        "    \"black-forest-labs/FLUX.2-klein-9B\",\n",
+        "    torch_dtype=torch.bfloat16, # Consistent with initial load\n",
+        "    low_cpu_mem_usage=True,\n",
+        "    device_map=\"cpu\",\n",
+        ")\n",
+        "\n",
+        "original_pipe_for_components.tokenizer.save_pretrained(os.path.join(rebuilt_pipeline_dir, \"tokenizer\"))\n",
+        "original_pipe_for_components.scheduler.save_pretrained(os.path.join(rebuilt_pipeline_dir, \"scheduler\"))\n",
+        "\n",
+        "del original_pipe_for_components\n",
+        "gc.collect()\n",
+        "torch.cuda.empty_cache()\n",
+        "\n",
+        "\n",
+        "# Step 2: Copy the locally SDNQ-saved model components into the temporary pipeline directory\n",
+        "print(f\"Copying SDNQ-applied text_encoder, transformer, vae to {rebuilt_pipeline_dir}...\")\n",
+        "\n",
+        "# Ensure the subdirectories exist in the rebuilt_pipeline_dir before copying\n",
+        "os.makedirs(os.path.join(rebuilt_pipeline_dir, \"text_encoder\"), exist_ok=True)\n",
+        "os.makedirs(os.path.join(rebuilt_pipeline_dir, \"transformer\"), exist_ok=True)\n",
+        "os.makedirs(os.path.join(rebuilt_pipeline_dir, \"vae\"), exist_ok=True)\n",
+        "\n",
+        "# Copy contents of text_encoder_local_path into rebuilt_pipeline_dir/text_encoder\n",
+        "for item_name in os.listdir(text_encoder_local_path):\n",
+        "    s = os.path.join(text_encoder_local_path, item_name)\n",
+        "    d = os.path.join(rebuilt_pipeline_dir, \"text_encoder\", item_name)\n",
+        "    if os.path.isdir(s):\n",
+        "        shutil.copytree(s, d, dirs_exist_ok=True)\n",
+        "    else:\n",
+        "        shutil.copy2(s, d)\n",
+        "\n",
+        "# Copy contents of transformer_local_path into rebuilt_pipeline_dir/transformer\n",
+        "for item_name in os.listdir(transformer_local_path):\n",
+        "    s = os.path.join(transformer_local_path, item_name)\n",
+        "    d = os.path.join(rebuilt_pipeline_dir, \"transformer\", item_name)\n",
+        "    if os.path.isdir(s):\n",
+        "        shutil.copytree(s, d, dirs_exist_ok=True)\n",
+        "    else:\n",
+        "        shutil.copy2(s, d)\n",
+        "\n",
+        "# Copy contents of vae_local_path into rebuilt_pipeline_dir/vae\n",
+        "for item_name in os.listdir(vae_local_path):\n",
+        "    s = os.path.join(vae_local_path, item_name)\n",
+        "    d = os.path.join(rebuilt_pipeline_dir, \"vae\", item_name)\n",
+        "    if os.path.isdir(s):\n",
+        "        shutil.copytree(s, d, dirs_exist_ok=True)\n",
+        "    else:\n",
+        "        shutil.copy2(s, d)\n",
+        "\n",
+        "# Step 3: Load the full pipeline from the temporary directory\n",
+        "print(f\"Loading full pipeline from {rebuilt_pipeline_dir}...\")\n",
+        "new_pipe = Flux2KleinPipeline.from_pretrained(\n",
+        "    rebuilt_pipeline_dir,\n",
+        "    torch_dtype=torch.bfloat16, # Consistent with initial load\n",
+        "    low_cpu_mem_usage=True,\n",
+        "    device_map=\"cpu\", # Important for offloading\n",
+        ")\n",
+        "print(\"✅ Base pipeline rebuilt from local components.\")\n",
+        "\n",
+        "# Step 4: Re-apply SDNQ to the loaded components\n",
+        "# When loaded via from_pretrained, the base models are loaded, not the SDNQ wrappers.\n",
+        "print(\"🔥 Re-applying SDNQ optimizations to text_encoder, transformer, and vae...\")\n",
+        "sdnq_params = dict(\n",
+        "    use_dynamic_quantization=True,\n",
+        "    weights_dtype=\"uint4\",\n",
+        "    dynamic_loss_threshold=1e-2,\n",
+        "    use_svd=True, # Set to True for consistency with original SDNQ application\n",
+        "    group_size=0,\n",
+        "    quantization_device=\"cuda\", # Set to cuda for consistency with original SDNQ application\n",
+        "    return_device=\"cpu\",\n",
+        "    quant_conv=False,\n",
+        "    quant_embedding=False,\n",
+        ")\n",
+        "\n",
+        "new_pipe.transformer = sdnq_post_load_quant(new_pipe.transformer, **sdnq_params)\n",
+        "new_pipe.text_encoder = sdnq_post_load_quant(new_pipe.text_encoder, **sdnq_params)\n",
+        "new_pipe.vae = sdnq_post_load_quant(new_pipe.vae, **sdnq_params)\n",
+        "print(\"✅ SDNQ re-applied to all necessary components.\")\n",
+        "\n",
+        "gc.collect()\n",
+        "torch.cuda.empty_cache()\n",
+        "\n",
+        "# Step 5: Login to Hugging Face and push the new pipeline\n",
+        "print(\"\\nLogging into Hugging Face...\")\n",
+        "login(token=userdata.get(\"HF_TOKEN\")) # hf_token is available from CELL 1\n",
+        "\n",
+        "print(\"\\nPushing rebuilt and SDNQ-applied pipeline to Hugging Face Hub...\")\n",
+        "new_pipe.push_to_hub(\n",
+        "    repo_id=\"codeShare/FLUX.2-klein-9b-SDNQ-2bit\", # Using the 2bit repo_id as per request\n",
+        "    safe_serialization=True,\n",
+        "    commit_message=\"Rebuilt and pushed SDNQ Flux2 Klein 9b (2bit) after local save/crash\"\n",
+        ")\n",
+        "\n",
+        "print(\"\\n✅ Rebuilt pipeline pushed to Hugging Face Hub.\")\n",
+        "\n",
+        "# Update the global pipe object to the newly built one\n",
+        "pipe = new_pipe"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "3bdfe2e2"
+      },
+      "source": [
+        "import os\n",
+        "from huggingface_hub import HfApi, login, create_repo\n",
+        "from google.colab import userdata\n",
+        "from huggingface_hub.utils import HfHubHTTPError\n",
+        "\n",
+        "# Retrieve hf_token from Colab secrets\n",
+        "hf_token = userdata.get(\"HF_TOKEN\")\n",
+        "if not hf_token:\n",
+        "    raise ValueError(\"HF_TOKEN not found in Google Colab secrets. Please ensure it is set.\")\n",
+        "\n",
+        "# Login to Hugging Face Hub\n",
+        "print(\"\\nLogging into Hugging Face...\")\n",
+        "login(token=hf_token)\n",
+        "\n",
+        "api = HfApi()\n",
+        "\n",
+        "# Define the target repository ID\n",
+        "repo_id = \"codeShare/FLUX.2-klein-9b-SDNQ-2bit\" # Or \"codeShare/FLUX.2-klein-9b-SDNQ-4bit\" if that was the intended repo\n",
+        "\n",
+        "# Check if repository exists, if not, create it\n",
+        "print(f\"Checking if repository {repo_id} exists...\")\n",
+        "try:\n",
+        "    if not api.repo_exists(repo_id=repo_id, repo_type=\"model\"):\n",
+        "        print(f\"Repository {repo_id} not found. Creating it...\")\n",
+        "        create_repo(repo_id=repo_id, repo_type=\"model\", private=False, token=hf_token)\n",
+        "        print(f\"Repository {repo_id} created successfully.\")\n",
+        "    else:\n",
+        "        print(f\"Repository {repo_id} already exists.\")\n",
+        "except HfHubHTTPError as e:\n",
+        "    print(f\"An error occurred while checking or creating the repository: {e}\")\n",
+        "    raise # Re-raise other HTTP errors\n",
+        "\n",
+        "# Define local paths for the folders to upload\n",
+        "local_folders_to_upload = [\"/content/vae\", \"/content/tokenizer\", \"/content/scheduler\"]\n",
+        "\n",
+        "# Define the source path for model.safetensors.index.json and its target name in the repo\n",
+        "model_index_source_path = \"/content/text_encoder/model.safetensors.index.json\" # Assuming text_encoder still exists\n",
+        "model_index_target_filename = \"model_index.json\"\n",
+        "\n",
+        "print(f\"\\nUploading specified components to {repo_id}...\")\n",
+        "\n",
+        "# Upload each specified folder to a corresponding path in the repo\n",
+        "for local_folder_path in local_folders_to_upload:\n",
+        "    folder_name = os.path.basename(local_folder_path)\n",
+        "    if os.path.isdir(local_folder_path):\n",
+        "        print(f\"Uploading folder: {local_folder_path} to repo path: {folder_name}...\")\n",
+        "        api.upload_folder(\n",
+        "            folder_path=local_folder_path,\n",
+        "            repo_id=repo_id,\n",
+        "            repo_type=\"model\",\n",
+        "            path_in_repo=folder_name, # Uploads contents of local_folder_path into a folder named folder_name in the repo\n",
+        "            commit_message=f\"Upload {folder_name} component\",\n",
+        "        )\n",
+        "        print(f\"✅ Folder {local_folder_path} uploaded to {repo_id}/{folder_name}.\")\n",
+        "    else:\n",
+        "        print(f\"⚠️ Local folder not found, skipping: {local_folder_path}\")\n",
+        "\n",
+        "# Upload model.safetensors.index.json as model_index.json to the root of the repo\n",
+        "if os.path.exists(model_index_source_path):\n",
+        "    print(f\"Uploading file: {model_index_source_path} as {model_index_target_filename} to the repo root...\")\n",
+        "    api.upload_file(\n",
+        "        path_or_fileobj=model_index_source_path,\n",
+        "        path_in_repo=model_index_target_filename,\n",
+        "        repo_id=repo_id,\n",
+        "        repo_type=\"model\",\n",
+        "        commit_message=f\"Upload {model_index_target_filename}\",\n",
+        "    )\n",
+        "    print(f\"✅ File {model_index_source_path} uploaded.\")\n",
+        "else:\n",
+        "    print(f\"⚠️ {model_index_source_path} not found, skipping upload of {model_index_target_filename}.\")\n",
+        "\n",
+        "print(\"\\n✅ All specified components processed for upload to Hugging Face Hub.\")"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "a36d8446"
+      },
+      "source": [
+        "import os\n",
+        "from huggingface_hub import HfApi, login, create_repo\n",
+        "from google.colab import userdata\n",
+        "from huggingface_hub.utils import HfHubHTTPError\n",
+        "\n",
+        "# Retrieve hf_token from Colab secrets\n",
+        "hf_token = userdata.get(\"HF_TOKEN\")\n",
+        "if not hf_token:\n",
+        "    raise ValueError(\"HF_TOKEN not found in Google Colab secrets. Please ensure it is set.\")\n",
+        "\n",
+        "# Login to Hugging Face Hub\n",
+        "print(\"\\nLogging into Hugging Face...\")\n",
+        "login(token=hf_token)\n",
+        "\n",
+        "api = HfApi()\n",
+        "\n",
+        "# Define the target repository ID\n",
+        "repo_id = \"codeShare/FLUX.2-klein-9b-SDNQ-2bit\" # Or \"codeShare/FLUX.2-klein-9b-SDNQ-4bit\" if that was the intended repo\n",
+        "\n",
+        "# Check if repository exists, if not, create it\n",
+        "print(f\"Checking if repository {repo_id} exists...\")\n",
+        "try:\n",
+        "    if not api.repo_exists(repo_id=repo_id, repo_type=\"model\"):\n",
+        "        print(f\"Repository {repo_id} not found. Creating it...\")\n",
+        "        create_repo(repo_id=repo_id, repo_type=\"model\", private=False, token=hf_token)\n",
+        "        print(f\"Repository {repo_id} created successfully.\")\n",
+        "    else:\n",
+        "        print(f\"Repository {repo_id} already exists.\")\n",
+        "except HfHubHTTPError as e:\n",
+        "    print(f\"An error occurred while checking or creating the repository: {e}\")\n",
+        "    raise # Re-raise other HTTP errors\n",
+        "\n",
+        "# Define local paths for the folders to upload\n",
+        "local_folders_to_upload = [\"/content/vae\", \"/content/tokenizer\", \"/content/scheduler\"]\n",
+        "\n",
+        "# Define the source path for model.safetensors.index.json and its target name in the repo\n",
+        "model_index_source_path = \"/content/text_encoder/model.safetensors.index.json\" # Assuming text_encoder still exists\n",
+        "model_index_target_filename = \"model_index.json\"\n",
+        "\n",
+        "print(f\"\\nUploading specified components to {repo_id}...\")\n",
+        "\n",
+        "# Upload each specified folder to a corresponding path in the repo\n",
+        "for local_folder_path in local_folders_to_upload:\n",
+        "    folder_name = os.path.basename(local_folder_path)\n",
+        "    if os.path.isdir(local_folder_path):\n",
+        "        print(f\"Uploading folder: {local_folder_path} to repo path: {folder_name}...\")\n",
+        "        api.upload_folder(\n",
+        "            folder_path=local_folder_path,\n",
+        "            repo_id=repo_id,\n",
+        "            repo_type=\"model\",\n",
+        "            path_in_repo=folder_name, # Uploads contents of local_folder_path into a folder named folder_name in the repo\n",
+        "            commit_message=f\"Upload {folder_name} component\",\n",
+        "        )\n",
+        "        print(f\"✅ Folder {local_folder_path} uploaded to {repo_id}/{folder_name}.\")\n",
+        "    else:\n",
+        "        print(f\"⚠️ Local folder not found, skipping: {local_folder_path}\")\n",
+        "\n",
+        "# Upload model.safetensors.index.json as model_index.json to the root of the repo\n",
+        "if os.path.exists(model_index_source_path):\n",
+        "    print(f\"Uploading file: {model_index_source_path} as {model_index_target_filename} to the repo root...\")\n",
+        "    api.upload_file(\n",
+        "        path_or_fileobj=model_index_source_path,\n",
+        "        path_in_repo=model_index_target_filename,\n",
+        "        repo_id=repo_id,\n",
+        "        repo_type=\"model\",\n",
+        "        commit_message=f\"Upload {model_index_target_filename}\",\n",
+        "    )\n",
+        "    print(f\"✅ File {model_index_source_path} uploaded as {repo_id}/{model_index_target_filename}.\")\n",
+        "else:\n",
+        "    print(f\"⚠️ {model_index_source_path} not found, skipping upload of {model_index_target_filename}.\")\n",
+        "\n",
+        "print(\"\\n✅ All specified components processed for upload to Hugging Face Hub.\")"
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "gpuType": "T4"
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}