AiArtLab
/

simplevae

Diffusers

Safetensors

Model card Files Files and versions

xet

Community

recoilme commited on Sep 15, 2025

Commit

61df958

1 Parent(s): 1b8b52e

eval

Browse files

Files changed (2) hide show

.ipynb_checkpoints/test-checkpoint.ipynb +0 -95
README.md +30 -0

.ipynb_checkpoints/test-checkpoint.ipynb DELETED Viewed

@@ -1,95 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "4f62bfd9-5396-48e2-aac7-bdf639cab345",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "The config attributes {'block_out_channels': [128, 256, 512, 768, 768], 'force_upcast': False} were passed to AsymmetricAutoencoderKL, but are not expected and will be ignored. Please verify your config.json configuration file.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ok\n"
-     ]
-    }
-   ],
-   "source": [
-    "import torch\n",
-    "\n",
-    "from torchvision import transforms, utils\n",
-    "\n",
-    "import diffusers\n",
-    "from diffusers import AsymmetricAutoencoderKL\n",
-    "\n",
-    "from diffusers.utils import load_image\n",
-    "\n",
-    "def crop_image_to_nearest_divisible_by_8(img):\n",
-    "    # Check if the image height and width are divisible by 8\n",
-    "    if img.shape[1] % 8 == 0 and img.shape[2] % 8 == 0:\n",
-    "        return img\n",
-    "    else:\n",
-    "        # Calculate the closest lower resolution divisible by 8\n",
-    "        new_height = img.shape[1] - (img.shape[1] % 8)\n",
-    "        new_width = img.shape[2] - (img.shape[2] % 8)\n",
-    "        \n",
-    "        # Use CenterCrop to crop the image\n",
-    "        transform = transforms.CenterCrop((new_height, new_width), interpolation=transforms.InterpolationMode.BILINEAR)\n",
-    "        img = transform(img).to(torch.float32).clamp(-1, 1)\n",
-    "        \n",
-    "        return img\n",
-    "        \n",
-    "to_tensor = transforms.ToTensor()\n",
-    "\n",
-    "device = \"cuda\"\n",
-    "dtype=torch.float16\n",
-    "vae = AsymmetricAutoencoderKL.from_pretrained(\"vae\",torch_dtype=dtype).to(device).eval()\n",
-    "\n",
-    "image = load_image(\"generated.png\")\n",
-    "\n",
-    "image = crop_image_to_nearest_divisible_by_8(to_tensor(image)).unsqueeze(0).to(device,dtype=dtype)\n",
-    "\n",
-    "upscaled_image = vae(image).sample\n",
-    "# Save the reconstructed image\n",
-    "utils.save_image(upscaled_image, \"test.png\")\n",
-    "print('ok')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7e3ad326-c410-44b6-a738-15b7f7e15075",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

README.md ADDED Viewed

	@@ -0,0 +1,30 @@

+=== Eval ===
+```
+SD15 VAE                   | MSE=2.732e-03 PSNR=28.10 LPIPS=0.147 Edge=0.206 KL=19.821 | Z[min/mean/max/std]=[-17.375, 0.072, 16.203, 0.900] | Skew[min/mean/max]=[-0.543, -0.126, 0.070] | Kurt[min/mean/max]=[-0.151, 1.228, 4.574]
+SDXL VAE fp16 fix          | MSE=2.018e-03 PSNR=29.67 LPIPS=0.124 Edge=0.188 KL=32.222 | Z[min/mean/max/std]=[-4.066, -0.014, 4.301, 0.861] | Skew[min/mean/max]=[-0.017, 0.105, 0.165] | Kurt[min/mean/max]=[-0.380, -0.228, -0.107]
+AiArtLab/sdxl_vae          | MSE=1.736e-03 PSNR=30.29 LPIPS=0.116 Edge=0.181 KL=32.222 | Z[min/mean/max/std]=[-4.066, -0.014, 4.301, 0.861] | Skew[min/mean/max]=[-0.017, 0.105, 0.165] | Kurt[min/mean/max]=[-0.380, -0.228, -0.107]
+LTX-Video VAE              | MSE=1.202e-03 PSNR=31.84 LPIPS=0.141 Edge=0.168 KL=6.656 | Z[min/mean/max/std]=[-5.043, 0.011, 4.969, 0.272] | Skew[min/mean/max]=[-0.542, -0.018, 0.411] | Kurt[min/mean/max]=[-0.576, 0.741, 1.843]
+Wan2.2-TI2V-5B             | MSE=7.782e-04 PSNR=34.25 LPIPS=0.052 Edge=0.121 KL=9.472 | Z[min/mean/max/std]=[-4.789, -0.012, 4.266, 0.375] | Skew[min/mean/max]=[-0.397, 0.022, 0.653] | Kurt[min/mean/max]=[-0.482, 0.006, 0.538]
+AiArtLab/wan16x_vae        | MSE=7.275e-04 PSNR=34.51 LPIPS=0.051 Edge=0.118 KL=9.472 | Z[min/mean/max/std]=[-4.789, -0.012, 4.266, 0.375] | Skew[min/mean/max]=[-0.397, 0.022, 0.653] | Kurt[min/mean/max]=[-0.482, 0.006, 0.538]
+Wan2.2-T2V-A14B            | MSE=7.073e-04 PSNR=34.59 LPIPS=0.048 Edge=0.115 KL=7.781 | Z[min/mean/max/std]=[-15.336, -0.159, 17.703, 2.563] | Skew[min/mean/max]=[-0.343, 0.006, 0.367] | Kurt[min/mean/max]=[-0.538, -0.071, 0.594]
+QwenImage                  | MSE=6.549e-04 PSNR=35.21 LPIPS=0.047 Edge=0.110 KL=7.776 | Z[min/mean/max/std]=[-15.297, -0.158, 17.688, 2.561] | Skew[min/mean/max]=[-0.346, 0.005, 0.368] | Kurt[min/mean/max]=[-0.538, -0.072, 0.597]
+AuraDiffusion/16ch-vae     | MSE=5.361e-04 PSNR=35.80 LPIPS=0.041 Edge=0.100 KL=4.421 | Z[min/mean/max/std]=[-1.373, -0.005, 1.621, 0.165] | Skew[min/mean/max]=[-0.331, 0.040, 0.413] | Kurt[min/mean/max]=[-0.170, 0.303, 0.670]
+FLUX.1-schnell VAE         | MSE=4.594e-04 PSNR=35.87 LPIPS=0.035 Edge=0.088 KL=13.016 | Z[min/mean/max/std]=[-5.824, -0.076, 6.246, 0.945] | Skew[min/mean/max]=[-0.268, 0.048, 0.483] | Kurt[min/mean/max]=[-0.498, 0.037, 0.568]
+AiArtLab/simplevae         | MSE=4.818e-04 PSNR=36.20 LPIPS=0.035 Edge=0.095 KL=4.032 | Z[min/mean/max/std]=[-7.762, -0.061, 9.914, 0.965] | Skew[min/mean/max]=[-0.320, 0.044, 0.411] | Kurt[min/mean/max]=[-0.045, 0.346, 0.696]
+```
+=== Percent ===
+```
+| Model                      |      PSNR |     LPIPS |      Edge |
+|----------------------------|-----------|-----------|-----------|
+| SD15 VAE                   |      100% |      100% |      100% |
+| SDXL VAE fp16 fix          |    105.6% |    118.3% |    109.7% |
+| AiArtLab/sdxl_vae          |    107.8% |    126.8% |    113.8% |
+| LTX-Video VAE              |    113.3% |    103.8% |    122.5% |
+| Wan2.2-TI2V-5B             |    121.9% |    280.8% |    170.8% |
+| AiArtLab/wan16x_vae        |    122.8% |    287.3% |    174.2% |
+| Wan2.2-T2V-A14B            |    123.1% |    303.2% |    179.4% |
+| QwenImage                  |    125.3% |    308.8% |    188.0% |
+| AuraDiffusion/16ch-vae     |    127.4% |    355.5% |    206.6% |
+| FLUX.1-schnell VAE         |    127.6% |    424.4% |    234.8% |
+| AiArtLab/simplevae         |    128.8% |    415.2% |    217.7% |
+```