eval
Browse files- .ipynb_checkpoints/test-checkpoint.ipynb +0 -95
- README.md +30 -0
.ipynb_checkpoints/test-checkpoint.ipynb
DELETED
|
@@ -1,95 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "code",
|
| 5 |
-
"execution_count": 1,
|
| 6 |
-
"id": "4f62bfd9-5396-48e2-aac7-bdf639cab345",
|
| 7 |
-
"metadata": {},
|
| 8 |
-
"outputs": [
|
| 9 |
-
{
|
| 10 |
-
"name": "stderr",
|
| 11 |
-
"output_type": "stream",
|
| 12 |
-
"text": [
|
| 13 |
-
"The config attributes {'block_out_channels': [128, 256, 512, 768, 768], 'force_upcast': False} were passed to AsymmetricAutoencoderKL, but are not expected and will be ignored. Please verify your config.json configuration file.\n"
|
| 14 |
-
]
|
| 15 |
-
},
|
| 16 |
-
{
|
| 17 |
-
"name": "stdout",
|
| 18 |
-
"output_type": "stream",
|
| 19 |
-
"text": [
|
| 20 |
-
"ok\n"
|
| 21 |
-
]
|
| 22 |
-
}
|
| 23 |
-
],
|
| 24 |
-
"source": [
|
| 25 |
-
"import torch\n",
|
| 26 |
-
"\n",
|
| 27 |
-
"from torchvision import transforms, utils\n",
|
| 28 |
-
"\n",
|
| 29 |
-
"import diffusers\n",
|
| 30 |
-
"from diffusers import AsymmetricAutoencoderKL\n",
|
| 31 |
-
"\n",
|
| 32 |
-
"from diffusers.utils import load_image\n",
|
| 33 |
-
"\n",
|
| 34 |
-
"def crop_image_to_nearest_divisible_by_8(img):\n",
|
| 35 |
-
" # Check if the image height and width are divisible by 8\n",
|
| 36 |
-
" if img.shape[1] % 8 == 0 and img.shape[2] % 8 == 0:\n",
|
| 37 |
-
" return img\n",
|
| 38 |
-
" else:\n",
|
| 39 |
-
" # Calculate the closest lower resolution divisible by 8\n",
|
| 40 |
-
" new_height = img.shape[1] - (img.shape[1] % 8)\n",
|
| 41 |
-
" new_width = img.shape[2] - (img.shape[2] % 8)\n",
|
| 42 |
-
" \n",
|
| 43 |
-
" # Use CenterCrop to crop the image\n",
|
| 44 |
-
" transform = transforms.CenterCrop((new_height, new_width), interpolation=transforms.InterpolationMode.BILINEAR)\n",
|
| 45 |
-
" img = transform(img).to(torch.float32).clamp(-1, 1)\n",
|
| 46 |
-
" \n",
|
| 47 |
-
" return img\n",
|
| 48 |
-
" \n",
|
| 49 |
-
"to_tensor = transforms.ToTensor()\n",
|
| 50 |
-
"\n",
|
| 51 |
-
"device = \"cuda\"\n",
|
| 52 |
-
"dtype=torch.float16\n",
|
| 53 |
-
"vae = AsymmetricAutoencoderKL.from_pretrained(\"vae\",torch_dtype=dtype).to(device).eval()\n",
|
| 54 |
-
"\n",
|
| 55 |
-
"image = load_image(\"generated.png\")\n",
|
| 56 |
-
"\n",
|
| 57 |
-
"image = crop_image_to_nearest_divisible_by_8(to_tensor(image)).unsqueeze(0).to(device,dtype=dtype)\n",
|
| 58 |
-
"\n",
|
| 59 |
-
"upscaled_image = vae(image).sample\n",
|
| 60 |
-
"# Save the reconstructed image\n",
|
| 61 |
-
"utils.save_image(upscaled_image, \"test.png\")\n",
|
| 62 |
-
"print('ok')"
|
| 63 |
-
]
|
| 64 |
-
},
|
| 65 |
-
{
|
| 66 |
-
"cell_type": "code",
|
| 67 |
-
"execution_count": null,
|
| 68 |
-
"id": "7e3ad326-c410-44b6-a738-15b7f7e15075",
|
| 69 |
-
"metadata": {},
|
| 70 |
-
"outputs": [],
|
| 71 |
-
"source": []
|
| 72 |
-
}
|
| 73 |
-
],
|
| 74 |
-
"metadata": {
|
| 75 |
-
"kernelspec": {
|
| 76 |
-
"display_name": "Python 3 (ipykernel)",
|
| 77 |
-
"language": "python",
|
| 78 |
-
"name": "python3"
|
| 79 |
-
},
|
| 80 |
-
"language_info": {
|
| 81 |
-
"codemirror_mode": {
|
| 82 |
-
"name": "ipython",
|
| 83 |
-
"version": 3
|
| 84 |
-
},
|
| 85 |
-
"file_extension": ".py",
|
| 86 |
-
"mimetype": "text/x-python",
|
| 87 |
-
"name": "python",
|
| 88 |
-
"nbconvert_exporter": "python",
|
| 89 |
-
"pygments_lexer": "ipython3",
|
| 90 |
-
"version": "3.11.6"
|
| 91 |
-
}
|
| 92 |
-
},
|
| 93 |
-
"nbformat": 4,
|
| 94 |
-
"nbformat_minor": 5
|
| 95 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
=== Eval ===
|
| 2 |
+
```
|
| 3 |
+
SD15 VAE | MSE=2.732e-03 PSNR=28.10 LPIPS=0.147 Edge=0.206 KL=19.821 | Z[min/mean/max/std]=[-17.375, 0.072, 16.203, 0.900] | Skew[min/mean/max]=[-0.543, -0.126, 0.070] | Kurt[min/mean/max]=[-0.151, 1.228, 4.574]
|
| 4 |
+
SDXL VAE fp16 fix | MSE=2.018e-03 PSNR=29.67 LPIPS=0.124 Edge=0.188 KL=32.222 | Z[min/mean/max/std]=[-4.066, -0.014, 4.301, 0.861] | Skew[min/mean/max]=[-0.017, 0.105, 0.165] | Kurt[min/mean/max]=[-0.380, -0.228, -0.107]
|
| 5 |
+
AiArtLab/sdxl_vae | MSE=1.736e-03 PSNR=30.29 LPIPS=0.116 Edge=0.181 KL=32.222 | Z[min/mean/max/std]=[-4.066, -0.014, 4.301, 0.861] | Skew[min/mean/max]=[-0.017, 0.105, 0.165] | Kurt[min/mean/max]=[-0.380, -0.228, -0.107]
|
| 6 |
+
LTX-Video VAE | MSE=1.202e-03 PSNR=31.84 LPIPS=0.141 Edge=0.168 KL=6.656 | Z[min/mean/max/std]=[-5.043, 0.011, 4.969, 0.272] | Skew[min/mean/max]=[-0.542, -0.018, 0.411] | Kurt[min/mean/max]=[-0.576, 0.741, 1.843]
|
| 7 |
+
Wan2.2-TI2V-5B | MSE=7.782e-04 PSNR=34.25 LPIPS=0.052 Edge=0.121 KL=9.472 | Z[min/mean/max/std]=[-4.789, -0.012, 4.266, 0.375] | Skew[min/mean/max]=[-0.397, 0.022, 0.653] | Kurt[min/mean/max]=[-0.482, 0.006, 0.538]
|
| 8 |
+
AiArtLab/wan16x_vae | MSE=7.275e-04 PSNR=34.51 LPIPS=0.051 Edge=0.118 KL=9.472 | Z[min/mean/max/std]=[-4.789, -0.012, 4.266, 0.375] | Skew[min/mean/max]=[-0.397, 0.022, 0.653] | Kurt[min/mean/max]=[-0.482, 0.006, 0.538]
|
| 9 |
+
Wan2.2-T2V-A14B | MSE=7.073e-04 PSNR=34.59 LPIPS=0.048 Edge=0.115 KL=7.781 | Z[min/mean/max/std]=[-15.336, -0.159, 17.703, 2.563] | Skew[min/mean/max]=[-0.343, 0.006, 0.367] | Kurt[min/mean/max]=[-0.538, -0.071, 0.594]
|
| 10 |
+
QwenImage | MSE=6.549e-04 PSNR=35.21 LPIPS=0.047 Edge=0.110 KL=7.776 | Z[min/mean/max/std]=[-15.297, -0.158, 17.688, 2.561] | Skew[min/mean/max]=[-0.346, 0.005, 0.368] | Kurt[min/mean/max]=[-0.538, -0.072, 0.597]
|
| 11 |
+
AuraDiffusion/16ch-vae | MSE=5.361e-04 PSNR=35.80 LPIPS=0.041 Edge=0.100 KL=4.421 | Z[min/mean/max/std]=[-1.373, -0.005, 1.621, 0.165] | Skew[min/mean/max]=[-0.331, 0.040, 0.413] | Kurt[min/mean/max]=[-0.170, 0.303, 0.670]
|
| 12 |
+
FLUX.1-schnell VAE | MSE=4.594e-04 PSNR=35.87 LPIPS=0.035 Edge=0.088 KL=13.016 | Z[min/mean/max/std]=[-5.824, -0.076, 6.246, 0.945] | Skew[min/mean/max]=[-0.268, 0.048, 0.483] | Kurt[min/mean/max]=[-0.498, 0.037, 0.568]
|
| 13 |
+
AiArtLab/simplevae | MSE=4.818e-04 PSNR=36.20 LPIPS=0.035 Edge=0.095 KL=4.032 | Z[min/mean/max/std]=[-7.762, -0.061, 9.914, 0.965] | Skew[min/mean/max]=[-0.320, 0.044, 0.411] | Kurt[min/mean/max]=[-0.045, 0.346, 0.696]
|
| 14 |
+
```
|
| 15 |
+
=== Percent ===
|
| 16 |
+
```
|
| 17 |
+
| Model | PSNR | LPIPS | Edge |
|
| 18 |
+
|----------------------------|-----------|-----------|-----------|
|
| 19 |
+
| SD15 VAE | 100% | 100% | 100% |
|
| 20 |
+
| SDXL VAE fp16 fix | 105.6% | 118.3% | 109.7% |
|
| 21 |
+
| AiArtLab/sdxl_vae | 107.8% | 126.8% | 113.8% |
|
| 22 |
+
| LTX-Video VAE | 113.3% | 103.8% | 122.5% |
|
| 23 |
+
| Wan2.2-TI2V-5B | 121.9% | 280.8% | 170.8% |
|
| 24 |
+
| AiArtLab/wan16x_vae | 122.8% | 287.3% | 174.2% |
|
| 25 |
+
| Wan2.2-T2V-A14B | 123.1% | 303.2% | 179.4% |
|
| 26 |
+
| QwenImage | 125.3% | 308.8% | 188.0% |
|
| 27 |
+
| AuraDiffusion/16ch-vae | 127.4% | 355.5% | 206.6% |
|
| 28 |
+
| FLUX.1-schnell VAE | 127.6% | 424.4% | 234.8% |
|
| 29 |
+
| AiArtLab/simplevae | 128.8% | 415.2% | 217.7% |
|
| 30 |
+
```
|