recoilme commited on
Commit
e46facc
·
1 Parent(s): 35b669a

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ generated.png filter=lfs diff=lfs merge=lfs -text
37
+ test.png filter=lfs diff=lfs merge=lfs -text
.ipynb_checkpoints/test-checkpoint.ipynb ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "4f62bfd9-5396-48e2-aac7-bdf639cab345",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stderr",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "The config attributes {'block_out_channels': [128, 256, 512, 768, 768], 'force_upcast': False} were passed to AsymmetricAutoencoderKL, but are not expected and will be ignored. Please verify your config.json configuration file.\n"
14
+ ]
15
+ },
16
+ {
17
+ "name": "stdout",
18
+ "output_type": "stream",
19
+ "text": [
20
+ "ok\n"
21
+ ]
22
+ }
23
+ ],
24
+ "source": [
25
+ "import torch\n",
26
+ "\n",
27
+ "from torchvision import transforms, utils\n",
28
+ "\n",
29
+ "import diffusers\n",
30
+ "from diffusers import AsymmetricAutoencoderKL\n",
31
+ "\n",
32
+ "from diffusers.utils import load_image\n",
33
+ "\n",
34
+ "def crop_image_to_nearest_divisible_by_8(img):\n",
35
+ " # Check if the image height and width are divisible by 8\n",
36
+ " if img.shape[1] % 8 == 0 and img.shape[2] % 8 == 0:\n",
37
+ " return img\n",
38
+ " else:\n",
39
+ " # Calculate the closest lower resolution divisible by 8\n",
40
+ " new_height = img.shape[1] - (img.shape[1] % 8)\n",
41
+ " new_width = img.shape[2] - (img.shape[2] % 8)\n",
42
+ " \n",
43
+ " # Use CenterCrop to crop the image\n",
44
+ " transform = transforms.CenterCrop((new_height, new_width), interpolation=transforms.InterpolationMode.BILINEAR)\n",
45
+ " img = transform(img).to(torch.float32).clamp(-1, 1)\n",
46
+ " \n",
47
+ " return img\n",
48
+ " \n",
49
+ "to_tensor = transforms.ToTensor()\n",
50
+ "\n",
51
+ "device = \"cuda\"\n",
52
+ "dtype=torch.float16\n",
53
+ "vae = AsymmetricAutoencoderKL.from_pretrained(\"vae\",torch_dtype=dtype).to(device).eval()\n",
54
+ "\n",
55
+ "image = load_image(\"generated.png\")\n",
56
+ "\n",
57
+ "image = crop_image_to_nearest_divisible_by_8(to_tensor(image)).unsqueeze(0).to(device,dtype=dtype)\n",
58
+ "\n",
59
+ "upscaled_image = vae(image).sample\n",
60
+ "# Save the reconstructed image\n",
61
+ "utils.save_image(upscaled_image, \"test.png\")\n",
62
+ "print('ok')"
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "execution_count": null,
68
+ "id": "7e3ad326-c410-44b6-a738-15b7f7e15075",
69
+ "metadata": {},
70
+ "outputs": [],
71
+ "source": []
72
+ }
73
+ ],
74
+ "metadata": {
75
+ "kernelspec": {
76
+ "display_name": "Python 3 (ipykernel)",
77
+ "language": "python",
78
+ "name": "python3"
79
+ },
80
+ "language_info": {
81
+ "codemirror_mode": {
82
+ "name": "ipython",
83
+ "version": 3
84
+ },
85
+ "file_extension": ".py",
86
+ "mimetype": "text/x-python",
87
+ "name": "python",
88
+ "nbconvert_exporter": "python",
89
+ "pygments_lexer": "ipython3",
90
+ "version": "3.11.6"
91
+ }
92
+ },
93
+ "nbformat": 4,
94
+ "nbformat_minor": 5
95
+ }
create.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
test.ipynb ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "4f62bfd9-5396-48e2-aac7-bdf639cab345",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stderr",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "The config attributes {'block_out_channels': [128, 256, 512, 768, 768], 'force_upcast': False} were passed to AsymmetricAutoencoderKL, but are not expected and will be ignored. Please verify your config.json configuration file.\n"
14
+ ]
15
+ },
16
+ {
17
+ "name": "stdout",
18
+ "output_type": "stream",
19
+ "text": [
20
+ "ok\n"
21
+ ]
22
+ }
23
+ ],
24
+ "source": [
25
+ "import torch\n",
26
+ "\n",
27
+ "from torchvision import transforms, utils\n",
28
+ "\n",
29
+ "import diffusers\n",
30
+ "from diffusers import AsymmetricAutoencoderKL\n",
31
+ "\n",
32
+ "from diffusers.utils import load_image\n",
33
+ "\n",
34
+ "def crop_image_to_nearest_divisible_by_8(img):\n",
35
+ " # Check if the image height and width are divisible by 8\n",
36
+ " if img.shape[1] % 8 == 0 and img.shape[2] % 8 == 0:\n",
37
+ " return img\n",
38
+ " else:\n",
39
+ " # Calculate the closest lower resolution divisible by 8\n",
40
+ " new_height = img.shape[1] - (img.shape[1] % 8)\n",
41
+ " new_width = img.shape[2] - (img.shape[2] % 8)\n",
42
+ " \n",
43
+ " # Use CenterCrop to crop the image\n",
44
+ " transform = transforms.CenterCrop((new_height, new_width), interpolation=transforms.InterpolationMode.BILINEAR)\n",
45
+ " img = transform(img).to(torch.float32).clamp(-1, 1)\n",
46
+ " \n",
47
+ " return img\n",
48
+ " \n",
49
+ "to_tensor = transforms.ToTensor()\n",
50
+ "\n",
51
+ "device = \"cuda\"\n",
52
+ "dtype=torch.float16\n",
53
+ "vae = AsymmetricAutoencoderKL.from_pretrained(\"vae\",torch_dtype=dtype).to(device).eval()\n",
54
+ "\n",
55
+ "image = load_image(\"generated.png\")\n",
56
+ "\n",
57
+ "image = crop_image_to_nearest_divisible_by_8(to_tensor(image)).unsqueeze(0).to(device,dtype=dtype)\n",
58
+ "\n",
59
+ "upscaled_image = vae(image).sample\n",
60
+ "# Save the reconstructed image\n",
61
+ "utils.save_image(upscaled_image, \"test.png\")\n",
62
+ "print('ok')"
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "execution_count": null,
68
+ "id": "7e3ad326-c410-44b6-a738-15b7f7e15075",
69
+ "metadata": {},
70
+ "outputs": [],
71
+ "source": []
72
+ }
73
+ ],
74
+ "metadata": {
75
+ "kernelspec": {
76
+ "display_name": "Python 3 (ipykernel)",
77
+ "language": "python",
78
+ "name": "python3"
79
+ },
80
+ "language_info": {
81
+ "codemirror_mode": {
82
+ "name": "ipython",
83
+ "version": 3
84
+ },
85
+ "file_extension": ".py",
86
+ "mimetype": "text/x-python",
87
+ "name": "python",
88
+ "nbconvert_exporter": "python",
89
+ "pygments_lexer": "ipython3",
90
+ "version": "3.11.6"
91
+ }
92
+ },
93
+ "nbformat": 4,
94
+ "nbformat_minor": 5
95
+ }
vae/config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AsymmetricAutoencoderKL",
3
+ "_diffusers_version": "0.33.1",
4
+ "_name_or_path": "simple_vae",
5
+ "act_fn": "silu",
6
+ "block_out_channels": [
7
+ 128,
8
+ 256,
9
+ 512,
10
+ 768,
11
+ 768
12
+ ],
13
+ "down_block_out_channels": [
14
+ 128,
15
+ 256,
16
+ 512,
17
+ 512
18
+ ],
19
+ "down_block_types": [
20
+ "DownEncoderBlock2D",
21
+ "DownEncoderBlock2D",
22
+ "DownEncoderBlock2D",
23
+ "DownEncoderBlock2D"
24
+ ],
25
+ "force_upcast": false,
26
+ "in_channels": 3,
27
+ "latent_channels": 16,
28
+ "layers_per_down_block": 2,
29
+ "layers_per_up_block": 2,
30
+ "norm_num_groups": 32,
31
+ "out_channels": 3,
32
+ "sample_size": 1024,
33
+ "scaling_factor": 1,
34
+ "up_block_out_channels": [
35
+ 128,
36
+ 256,
37
+ 512,
38
+ 768,
39
+ 768
40
+ ],
41
+ "up_block_types": [
42
+ "UpDecoderBlock2D",
43
+ "UpDecoderBlock2D",
44
+ "UpDecoderBlock2D",
45
+ "UpDecoderBlock2D",
46
+ "UpDecoderBlock2D"
47
+ ]
48
+ }
vae/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a09af5fe391d8095fd1937160c5990f1da40d3f83b4836f25ca43699c3729de9
3
+ size 349017470