Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

.ipynb_checkpoints/config-checkpoint.json +103 -0
Untitled.ipynb +326 -0
config.json +103 -0
diffusion_pytorch_model.safetensors +3 -0
scale.py +107 -0

.ipynb_checkpoints/config-checkpoint.json ADDED Viewed

	@@ -0,0 +1,103 @@

+{
+  "_class_name": "AutoencoderKL",
+  "_diffusers_version": "0.36.0",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "force_upcast": true,
+  "in_channels": 3,
+  "latent_channels": 32,
+  "latents_mean": [
+    -0.03542253375053406,
+    0.20086465775966644,
+    -0.016413161531090736,
+    -0.0956302210688591,
+    -0.2672063112258911,
+    0.2609933018684387,
+    -0.07806991040706635,
+    -0.48407721519470215,
+    0.21844269335269928,
+    -0.1122383326292038,
+    0.27197545766830444,
+    -0.18958772718906403,
+    0.18776826560497284,
+    0.0987580344080925,
+    0.2837068736553192,
+    -0.4486690163612366,
+    0.4816776514053345,
+    0.02947971224784851,
+    -0.1337375044822693,
+    -0.39750921726226807,
+    -0.08513020724058151,
+    -0.054023586213588715,
+    -0.3943594992160797,
+    0.23918119072914124,
+    -0.12466679513454437,
+    0.09935147315263748,
+    0.31858691573143005,
+    0.48585832118988037,
+    -0.6416525840759277,
+    -0.15164820849895477,
+    -0.4693508744239807,
+    -0.13071806728839874
+  ],
+  "latents_std": [
+    1.5792087316513062,
+    1.5769503116607666,
+    1.5864241123199463,
+    1.6454921960830688,
+    1.5336694717407227,
+    1.5587652921676636,
+    1.5838669538497925,
+    1.5659377574920654,
+    1.6860467195510864,
+    1.5192310810089111,
+    1.573639988899231,
+    1.5953549146652222,
+    1.5271092653274536,
+    1.6246271133422852,
+    1.7054023742675781,
+    1.607722282409668,
+    1.558642864227295,
+    1.5824549198150635,
+    1.6202995777130127,
+    1.6206320524215698,
+    1.6379750967025757,
+    1.6527063846588135,
+    1.498811960220337,
+    1.5706247091293335,
+    1.5854856967926025,
+    1.4828169345855713,
+    1.5693111419677734,
+    1.692481517791748,
+    1.6409776210784912,
+    1.6216280460357666,
+    1.6087706089019775,
+    1.5776633024215698
+  ],
+  "layers_per_block": 2,
+  "mid_block_add_attention": true,
+  "norm_num_groups": 32,
+  "out_channels": 3,
+  "sample_size": 32,
+  "scaling_factor": 1.0,
+  "shift_factor": 0.0,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ],
+  "use_post_quant_conv": true,
+  "use_quant_conv": true
+}

Untitled.ipynb ADDED Viewed

	@@ -0,0 +1,326 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "dccce86b-90a0-47c7-aaad-2ebb16d90756",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Картинка загружена: torch.Size([1, 3, 1280, 1280])\n",
+      "\n",
+      "=======================================================\n",
+      "VAE : FLUX.2\n",
+      "repo: AiArtLab/sdxs-1b\n",
+      "latent_channels : 32\n",
+      "scaling_factor  : 1.00000\n",
+      "shift_factor    : 0.00000\n",
+      "latents_mean    : нет\n",
+      "latents_std     : нет\n",
+      "\n",
+      "[encode] raw latents: torch.Size([1, 32, 160, 160])\n",
+      "[flux2]  patchify  : torch.Size([1, 32, 160, 160]) → torch.Size([1, 128, 80, 80])\n",
+      "[flux2]  BN norm   : mean=-0.0096  std=1.7674\n",
+      "\n",
+      "[STATS] после BN нормализации (128ch):\n",
+      "  log-variance : -0.0767  (идеал ≈ 0.0)\n",
+      "  mean         : -0.0134\n",
+      "  std          : 0.9624\n",
+      "  shape        : torch.Size([1, 128, 80, 80])\n",
+      "\n",
+      "[flux2]  BN denorm + unpatchify: torch.Size([1, 32, 160, 160])\n",
+      "Сохранено: vaetest/decoded_FLUX.2.png\n",
+      "\n",
+      "=======================================================\n",
+      "VAE : vae32ch2\n",
+      "repo: vae32ch2\n",
+      "latent_channels : 32\n",
+      "scaling_factor  : 1.00000\n",
+      "shift_factor    : 0.00000\n",
+      "latents_mean    : да (32ch)\n",
+      "latents_std     : да (32ch)\n",
+      "\n",
+      "[encode] raw latents: torch.Size([1, 32, 160, 160])\n",
+      "\n",
+      "[STATS] после per-channel нормализации (32ch):\n",
+      "  log-variance : 0.1192  (идеал ≈ 0.0)\n",
+      "  mean         : -0.0016\n",
+      "  std          : 1.0614\n",
+      "  shape        : torch.Size([1, 32, 160, 160])\n",
+      "\n",
+      "[vae32ch2] denorm: torch.Size([1, 32, 160, 160])\n",
+      "Сохранено: vaetest/decoded_vae32ch2.png\n",
+      "\n",
+      "=======================================================\n",
+      "Готово\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "import torch\n",
+    "from PIL import Image\n",
+    "from diffusers import AutoencoderKL, AutoencoderKLFlux2\n",
+    "from torchvision.transforms.functional import to_pil_image\n",
+    "import matplotlib.pyplot as plt\n",
+    "import os\n",
+    "from torchvision.transforms import ToTensor, Normalize, CenterCrop\n",
+    "\n",
+    "# ── Настройки ─────────────────────────────────────────────────────────────────\n",
+    "IMG_PATH = \"1234.png\"\n",
+    "OUT_DIR  = \"vaetest\"\n",
+    "device   = \"cuda\"\n",
+    "dtype    = torch.float32\n",
+    "os.makedirs(OUT_DIR, exist_ok=True)\n",
+    "\n",
+    "VAES = {\n",
+    "    \"FLUX.2\":   (\"flux2\",    \"AiArtLab/sdxs-1b\"),\n",
+    "    \"vae32ch2\": (\"vae32ch\",  \"vae32ch2\"),\n",
+    "}\n",
+    "\n",
+    "# ── Patchify / Unpatchify ─────────────────────────────────────────────────────\n",
+    "def _patchify_latents(latents):\n",
+    "    B, C, H, W = latents.shape\n",
+    "    latents = latents.view(B, C, H // 2, 2, W // 2, 2)\n",
+    "    latents = latents.permute(0, 1, 3, 5, 2, 4)\n",
+    "    latents = latents.reshape(B, C * 4, H // 2, W // 2)\n",
+    "    return latents\n",
+    "\n",
+    "def _unpatchify_latents(latents):\n",
+    "    B, C, H, W = latents.shape\n",
+    "    latents = latents.reshape(B, C // 4, 2, 2, H, W)\n",
+    "    latents = latents.permute(0, 1, 4, 2, 5, 3)\n",
+    "    latents = latents.reshape(B, C // 4, H * 2, W * 2)\n",
+    "    return latents\n",
+    "\n",
+    "# ── Загрузка картинки ─────────────────────────────────────────────────────────\n",
+    "def load_image(path):\n",
+    "    img = Image.open(path).convert(\"RGB\")\n",
+    "    w, h = img.size\n",
+    "    img = CenterCrop((h // 8 * 8, w // 8 * 8))(img)\n",
+    "    tensor = ToTensor()(img).unsqueeze(0)\n",
+    "    tensor = Normalize(mean=[0.5]*3, std=[0.5]*3)(tensor)\n",
+    "    return img, tensor.to(device, dtype=dtype)\n",
+    "\n",
+    "def tensor_to_img(t):\n",
+    "    t = (t * 0.5 + 0.5).clamp(0, 1)\n",
+    "    return to_pil_image(t[0])\n",
+    "\n",
+    "# ── Статистика ────────────────────────────────────────────────────────────────\n",
+    "def logvariance(latents):\n",
+    "    return torch.log(latents.var() + 1e-8).item()\n",
+    "\n",
+    "def print_stats(name, latents):\n",
+    "    lv = logvariance(latents)\n",
+    "    print(f\"  log-variance : {lv:.4f}  (идеал ≈ 0.0)\")\n",
+    "    print(f\"  mean         : {latents.mean():.4f}\")\n",
+    "    print(f\"  std          : {latents.std():.4f}\")\n",
+    "    print(f\"  shape        : {latents.shape}\")\n",
+    "\n",
+    "def plot_latent_distribution(latents, title, save_path):\n",
+    "    from scipy.stats import probplot\n",
+    "    lat = latents.detach().cpu().float().numpy().flatten()\n",
+    "\n",
+    "    plt.figure(figsize=(10, 4))\n",
+    "\n",
+    "    plt.subplot(1, 2, 1)\n",
+    "    plt.hist(lat, bins=100, density=True, alpha=0.7, color=\"steelblue\")\n",
+    "    plt.title(f\"{title} histogram\")\n",
+    "    plt.xlabel(\"latent value\")\n",
+    "    plt.ylabel(\"density\")\n",
+    "\n",
+    "    plt.subplot(1, 2, 2)\n",
+    "    probplot(lat, dist=\"norm\", plot=plt)\n",
+    "    plt.title(f\"{title} QQ-plot\")\n",
+    "\n",
+    "    plt.tight_layout()\n",
+    "    plt.savefig(save_path)\n",
+    "    plt.close()\n",
+    "    print(f\"  график сохранён: {save_path}\")\n",
+    "\n",
+    "# ── Нормализация из конфига (per-channel для vae32ch) ────────────────────────\n",
+    "def make_norm_tensors(cfg, latent_channels, device, dtype):\n",
+    "    mean  = getattr(cfg, \"latents_mean\",   None)\n",
+    "    std   = getattr(cfg, \"latents_std\",    None)\n",
+    "    shift = getattr(cfg, \"shift_factor\",   0.0)\n",
+    "    scale = getattr(cfg, \"scaling_factor\", 1.0)\n",
+    "\n",
+    "    if mean is not None:\n",
+    "        mean = torch.tensor(mean, device=device, dtype=dtype).view(1, latent_channels, 1, 1)\n",
+    "    if std is not None:\n",
+    "        std  = torch.tensor(std,  device=device, dtype=dtype).view(1, latent_channels, 1, 1)\n",
+    "\n",
+    "    shift = torch.tensor(shift if shift else 0., device=device, dtype=dtype)\n",
+    "    scale = torch.tensor(scale, device=device, dtype=dtype)\n",
+    "    return mean, std, shift, scale\n",
+    "\n",
+    "# ── Основной цикл ─────────────────────────────────────────────────────────────\n",
+    "img, x = load_image(IMG_PATH)\n",
+    "img.save(os.path.join(OUT_DIR, \"original.png\"))\n",
+    "print(f\"Картинка загружена: {x.shape}\")\n",
+    "\n",
+    "for name, (kind, repo) in VAES.items():\n",
+    "    print(f\"\\n{'='*55}\")\n",
+    "    print(f\"VAE : {name}\")\n",
+    "    print(f\"repo: {repo}\")\n",
+    "\n",
+    "    # --- загружаем нужный класс ---\n",
+    "    if kind == \"flux2\":\n",
+    "        vae = AutoencoderKLFlux2.from_pretrained(\n",
+    "            repo, subfolder=\"vae\", torch_dtype=dtype\n",
+    "        ).to(device)\n",
+    "    else:\n",
+    "        vae = AutoencoderKL.from_pretrained(\n",
+    "            repo, torch_dtype=dtype\n",
+    "        ).to(device)\n",
+    "    vae.eval()\n",
+    "\n",
+    "    latent_channels = vae.config.latent_channels\n",
+    "    mean_t, std_t, shift_t, scale_t = make_norm_tensors(\n",
+    "        vae.config, latent_channels, device, dtype\n",
+    "    )\n",
+    "\n",
+    "    print(f\"latent_channels : {latent_channels}\")\n",
+    "    print(f\"scaling_factor  : {scale_t.item():.5f}\")\n",
+    "    print(f\"shift_factor    : {shift_t.item():.5f}\")\n",
+    "    print(f\"latents_mean    : {'да (' + str(latent_channels) + 'ch)' if mean_t is not None else 'нет'}\")\n",
+    "    print(f\"latents_std     : {'да (' + str(latent_channels) + 'ch)' if std_t  is not None else 'нет'}\")\n",
+    "\n",
+    "    with torch.no_grad():\n",
+    "\n",
+    "        # ── ENCODE ────────────────────────────────────────────────────────────\n",
+    "        latents = vae.encode(x).latent_dist.sample().to(dtype)\n",
+    "        print(f\"\\n[encode] raw latents: {latents.shape}\")\n",
+    "\n",
+    "        if kind == \"flux2\":\n",
+    "            # 32ch → patchify → 128ch\n",
+    "            latents_patched = _patchify_latents(latents)\n",
+    "            print(f\"[flux2]  patchify  : {latents.shape} → {latents_patched.shape}\")\n",
+    "\n",
+    "            # BN нормализация в 128-канальном пространстве\n",
+    "            bn_mean = vae.bn.running_mean.view(1, -1, 1, 1).to(device, dtype)\n",
+    "            bn_std  = torch.sqrt(\n",
+    "                vae.bn.running_var.view(1, -1, 1, 1) + vae.config.batch_norm_eps\n",
+    "            ).to(device, dtype)\n",
+    "            latents_normed = (latents_patched - bn_mean) / bn_std\n",
+    "            print(f\"[flux2]  BN norm   : mean={bn_mean.mean():.4f}  std={bn_std.mean():.4f}\")\n",
+    "\n",
+    "            # считаем статистику в 128ch нормализованном пространстве\n",
+    "            print(\"\\n[STATS] после BN нормализации (128ch):\")\n",
+    "            print_stats(name, latents_normed)\n",
+    "            #plot_latent_distribution(\n",
+    "            #    latents_normed,\n",
+    "            #    f\"{name}_latents\",\n",
+    "            #    os.path.join(OUT_DIR, f\"dist_{name}.png\")\n",
+    "            #)\n",
+    "\n",
+    "            # unpatchify → 32ch (для decode)\n",
+    "            latents = _unpatchify_latents(latents_normed)\n",
+    "\n",
+    "        else:  # vae32ch2\n",
+    "            # per-channel нормализация из конфига\n",
+    "            if mean_t is not None and std_t is not None:\n",
+    "                latents = (latents - mean_t) / std_t\n",
+    "            latents = (latents - shift_t) / scale_t\n",
+    "\n",
+    "            print(f\"\\n[STATS] после per-channel нормализации ({latent_channels}ch):\")\n",
+    "            print_stats(name, latents)\n",
+    "            #plot_latent_distribution(\n",
+    "            #    latents,\n",
+    "            #    f\"{name}_latents\",\n",
+    "            #    os.path.join(OUT_DIR, f\"dist_{name}.png\")\n",
+    "            #)\n",
+    "\n",
+    "        # ── DECODE ────────────────────────────────────────────────────────────\n",
+    "        if kind == \"flux2\":\n",
+    "            # patchify → denorm → unpatchify\n",
+    "            latents_patched  = _patchify_latents(latents)\n",
+    "            latents_denormed = latents_patched * bn_std + bn_mean\n",
+    "            latents          = _unpatchify_latents(latents_denormed)\n",
+    "            print(f\"\\n[flux2]  BN denorm + unpatchify: {latents.shape}\")\n",
+    "\n",
+    "        else:  # vae32ch2\n",
+    "            latents = latents * scale_t + shift_t\n",
+    "            if mean_t is not None and std_t is not None:\n",
+    "                latents = latents * std_t + mean_t\n",
+    "            print(f\"\\n[vae32ch2] denorm: {latents.shape}\")\n",
+    "\n",
+    "        rec = vae.decode(latents).sample\n",
+    "\n",
+    "    out_path = os.path.join(OUT_DIR, f\"decoded_{name}.png\")\n",
+    "    tensor_to_img(rec).save(out_path)\n",
+    "    print(f\"Сохранено: {out_path}\")\n",
+    "\n",
+    "print(f\"\\n{'='*55}\")\n",
+    "print(\"Готово\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "c219c07b-8da2-4182-ace6-8c3cc63ae3b1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: scipy in /usr/local/lib/python3.12/dist-packages (1.17.1)\n",
+      "Requirement already satisfied: numpy<2.7,>=1.26.4 in /usr/local/lib/python3.12/dist-packages (from scipy) (2.4.0)\n",
+      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
+      "\u001b[0m"
+     ]
+    },
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'scipy'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mModuleNotFoundError\u001b[39m                       Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 3\u001b[39m\n\u001b[32m      1\u001b[39m get_ipython().system(\u001b[33m'\u001b[39m\u001b[33mpip install --user scipy\u001b[39m\u001b[33m'\u001b[39m)\n\u001b[32m----> \u001b[39m\u001b[32m3\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mscipy\u001b[39;00m\n\u001b[32m      4\u001b[39m \u001b[38;5;28mprint\u001b[39m(scipy.__version__)\n",
+      "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'scipy'"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install --user scipy\n",
+    "\n",
+    "import scipy\n",
+    "print(scipy.__version__)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "43a4e1bc-2b02-4604-b69e-1a5aa276b6ac",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

config.json ADDED Viewed

	@@ -0,0 +1,103 @@

+{
+  "_class_name": "AutoencoderKL",
+  "_diffusers_version": "0.36.0",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "force_upcast": true,
+  "in_channels": 3,
+  "latent_channels": 32,
+  "latents_mean": [
+    -0.03542253375053406,
+    0.20086465775966644,
+    -0.016413161531090736,
+    -0.0956302210688591,
+    -0.2672063112258911,
+    0.2609933018684387,
+    -0.07806991040706635,
+    -0.48407721519470215,
+    0.21844269335269928,
+    -0.1122383326292038,
+    0.27197545766830444,
+    -0.18958772718906403,
+    0.18776826560497284,
+    0.0987580344080925,
+    0.2837068736553192,
+    -0.4486690163612366,
+    0.4816776514053345,
+    0.02947971224784851,
+    -0.1337375044822693,
+    -0.39750921726226807,
+    -0.08513020724058151,
+    -0.054023586213588715,
+    -0.3943594992160797,
+    0.23918119072914124,
+    -0.12466679513454437,
+    0.09935147315263748,
+    0.31858691573143005,
+    0.48585832118988037,
+    -0.6416525840759277,
+    -0.15164820849895477,
+    -0.4693508744239807,
+    -0.13071806728839874
+  ],
+  "latents_std": [
+    1.5792087316513062,
+    1.5769503116607666,
+    1.5864241123199463,
+    1.6454921960830688,
+    1.5336694717407227,
+    1.5587652921676636,
+    1.5838669538497925,
+    1.5659377574920654,
+    1.6860467195510864,
+    1.5192310810089111,
+    1.573639988899231,
+    1.5953549146652222,
+    1.5271092653274536,
+    1.6246271133422852,
+    1.7054023742675781,
+    1.607722282409668,
+    1.558642864227295,
+    1.5824549198150635,
+    1.6202995777130127,
+    1.6206320524215698,
+    1.6379750967025757,
+    1.6527063846588135,
+    1.498811960220337,
+    1.5706247091293335,
+    1.5854856967926025,
+    1.4828169345855713,
+    1.5693111419677734,
+    1.692481517791748,
+    1.6409776210784912,
+    1.6216280460357666,
+    1.6087706089019775,
+    1.5776633024215698
+  ],
+  "layers_per_block": 2,
+  "mid_block_add_attention": true,
+  "norm_num_groups": 32,
+  "out_channels": 3,
+  "sample_size": 32,
+  "scaling_factor": 1.0,
+  "shift_factor": 0.0,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ],
+  "use_post_quant_conv": true,
+  "use_quant_conv": true
+}

diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6369e370ff02168a240a9ebfd47810dd7babb36f76b7d9999e5d78cb4a1976c2
+size 336212308

scale.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import torch
+import numpy as np
+from PIL import Image
+from diffusers import AutoencoderKL
+from tqdm import tqdm
+import pathlib
+# ── 1. Загружаем VAE ──────────────────────────────────────────────────────────
+vae = AutoencoderKL.from_pretrained("vae32ch", torch_dtype=torch.float32)
+vae.eval().cuda()
+vae_scale_factor = 2 ** (len(vae.config.block_out_channels) - 1)  # = 8
+# ── 2. Собираем все PNG рекурсивно ───────────────────────────────────────────
+dataset_path = pathlib.Path("/workspace/ds")
+image_paths  = sorted(dataset_path.rglob("*.png"))
+print(f"Найдено картинок: {len(image_paths)}")
+# Берём первые 3000
+image_paths = image_paths[:30000]
+# ── 3. Препроцессинг — кроп до кратного 8 без ресайза ────────────────────────
+def preprocess(path):
+    img = Image.open(path).convert("RGB")
+    w, h = img.size
+    new_w = (w // vae_scale_factor) * vae_scale_factor
+    new_h = (h // vae_scale_factor) * vae_scale_factor
+    if new_w != w or new_h != h:
+        left = (w - new_w) // 2
+        top  = (h - new_h) // 2
+        img  = img.crop((left, top, left + new_w, top + new_h))
+    x = torch.from_numpy(np.array(img).astype(np.float32) / 255.0)
+    x = x.permute(2, 0, 1).unsqueeze(0)  # [1, 3, H, W]
+    x = x * 2.0 - 1.0                    # [-1, 1]
+    return x
+# ── 4. Считаем статистику по каналам ─────────────────────────────────────────
+latent_channels = vae.config.latent_channels  # 32
+all_means = []  # [N, C]
+all_stds  = []  # [N, C]
+errors    = []
+with torch.no_grad():
+    for path in tqdm(image_paths, desc="Encoding"):
+        try:
+            x    = preprocess(path).cuda()
+            lat  = vae.encode(x).latent_dist.sample()          # [1, C, H, W]
+            flat = lat.squeeze(0).float().reshape(latent_channels, -1)  # [C, H*W]
+            all_means.append(flat.mean(dim=1).cpu())  # [C]
+            all_stds.append(flat.std(dim=1).cpu())    # [C]
+        except Exception as e:
+            errors.append((path, str(e)))
+if errors:
+    print(f"\nОшибки ({len(errors)}):")
+    for p, e in errors:
+        print(f"  {p}: {e}")
+mean = torch.stack(all_means).mean(dim=0)  # [C]
+std  = torch.stack(all_stds).mean(dim=0)   # [C]
+print(f"\nОбработано картинок: {len(all_means)}")
+print(f"\nlatents_mean ({latent_channels} каналов):")
+print(mean.tolist())
+print(f"\nlatents_std ({latent_channels} каналов):")
+print(std.tolist())
+# ── 5. Создаём новый VAE с той же архитектурой + scaling векторы ──────────────
+cfg = vae.config
+new_vae = AutoencoderKL(
+    in_channels        = cfg.in_channels,
+    out_channels       = cfg.out_channels,
+    latent_channels    = cfg.latent_channels,
+    block_out_channels = cfg.block_out_channels,
+    layers_per_block   = cfg.layers_per_block,
+    norm_num_groups    = cfg.norm_num_groups,
+    act_fn             = cfg.act_fn,
+    down_block_types   = cfg.down_block_types,
+    up_block_types     = cfg.up_block_types,
+)
+new_vae.eval()
+# Переносим веса
+result = new_vae.load_state_dict(vae.state_dict(), strict=False)
+print(f"\nВеса перенесены: {result}")
+# Прописываем scaling векторы в конфиг
+new_vae.register_to_config(
+    latents_mean   = mean.tolist(),
+    latents_std    = std.tolist(),
+    scaling_factor = 1.0,
+    shift_factor   = 0.0,
+)
+print(f"\nlatents_mean в конфиге: {new_vae.config.latents_mean[:4]}...")
+print(f"latents_std  в конфиге: {new_vae.config.latents_std[:4]}...")
+# ── 6. Сохраняем ──────────────────────────────────────────────────────────────
+new_vae.save_pretrained("vae32ch2")
+print("\nСохранено в vae32ch2/")