PASD

Sleeping

App Files Files Community

fffiloni commited on 11 days ago

Commit

ea08edb

verified ·

1 Parent(s): 0e55432

Update app_zero.py

Browse files

Files changed (1) hide show

app_zero.py +155 -100

app_zero.py CHANGED Viewed

@@ -1,55 +1,70 @@
-import torch
 import types
-torch.cuda.get_device_capability = lambda *args, **kwargs: (8, 6)
-torch.cuda.get_device_properties = lambda *args, **kwargs: types.SimpleNamespace(name='NVIDIA A10G', major=8, minor=6, total_memory=23836033024, multi_processor_count=80)
 import huggingface_hub
 huggingface_hub.snapshot_download(
-    repo_id='camenduru/PASD',
     allow_patterns=[
-        'pasd/**',
-        'pasd_light/**',
-        'pasd_light_rrdb/**',
-        'pasd_rrdb/**',
     ],
-    local_dir='PASD/runs',
-    #local_dir_use_symlinks=False,
 )
 huggingface_hub.hf_hub_download(
-    repo_id='camenduru/PASD',
-    filename='majicmixRealistic_v6.safetensors',
-    local_dir='PASD/checkpoints/personalized_models',
-    #local_dir_use_symlinks=False,
 )
 huggingface_hub.hf_hub_download(
-    repo_id='akhaliq/RetinaFace-R50',
-    filename='RetinaFace-R50.pth',
-    local_dir='PASD/annotator/ckpts',
-    #local_dir_use_symlinks=False,
 )
-import sys; sys.path.append('./PASD')
-import spaces
-import os
-import datetime
-import einops
-import gradio as gr
-from gradio_imageslider import ImageSlider
-import numpy as np
-import torch
-import random
-from PIL import Image
-from pathlib import Path
-from torchvision import transforms
-import torch.nn.functional as F
-from torchvision.models import resnet50, ResNet50_Weights
-from pytorch_lightning import seed_everything
-from transformers import CLIPTextModel, CLIPTokenizer, CLIPImageProcessor
-from diffusers import AutoencoderKL, DDIMScheduler, PNDMScheduler, DPMSolverMultistepScheduler, UniPCMultistepScheduler
-# compat shim for older custom pipelines expecting diffusers.pipeline_utils
-import sys
 try:
     import diffusers.pipelines.pipeline_utils as _pipeline_utils
     sys.modules["diffusers.pipeline_utils"] = _pipeline_utils
@@ -61,6 +76,7 @@ from myutils.misc import load_dreambooth_lora, rand_name
 from myutils.wavelet_color_fix import wavelet_color_fix
 from annotator.retinaface import RetinaFaceDetection
 use_pasd_light = False
 face_detector = RetinaFaceDetection()
@@ -73,12 +89,12 @@ else:
 pretrained_model_path = "stable-diffusion-v1-5/stable-diffusion-v1-5"
 ckpt_path = "PASD/runs/pasd/checkpoint-100000"
-#dreambooth_lora_path = "checkpoints/personalized_models/toonyou_beta3.safetensors"
 dreambooth_lora_path = "PASD/checkpoints/personalized_models/majicmixRealistic_v6.safetensors"
-#dreambooth_lora_path = "checkpoints/personalized_models/Realistic_Vision_V5.1.safetensors"
 weight_dtype = torch.float16
 device = "cuda"
 scheduler = UniPCMultistepScheduler.from_pretrained(pretrained_model_path, subfolder="scheduler")
 text_encoder = CLIPTextModel.from_pretrained(pretrained_model_path, subfolder="text_encoder")
 tokenizer = CLIPTokenizer.from_pretrained(pretrained_model_path, subfolder="tokenizer")
@@ -86,6 +102,7 @@ vae = AutoencoderKL.from_pretrained(pretrained_model_path, subfolder="vae")
 feature_extractor = CLIPImageProcessor.from_pretrained(pretrained_model_path, subfolder="feature_extractor")
 unet = UNet2DConditionModel.from_pretrained(ckpt_path, subfolder="unet")
 controlnet = ControlNetModel.from_pretrained(ckpt_path, subfolder="controlnet")
 vae.requires_grad_(False)
 text_encoder.requires_grad_(False)
 unet.requires_grad_(False)
@@ -99,101 +116,106 @@ unet.to(device, dtype=weight_dtype)
 controlnet.to(device, dtype=weight_dtype)
 validation_pipeline = StableDiffusionControlNetPipeline(
-        vae=vae, text_encoder=text_encoder, tokenizer=tokenizer, feature_extractor=feature_extractor,
-        unet=unet, controlnet=controlnet, scheduler=scheduler, safety_checker=None, requires_safety_checker=False,
-    )
-#validation_pipeline.enable_vae_tiling()
 validation_pipeline._init_tiled_vae(decoder_tile_size=224)
 weights = ResNet50_Weights.DEFAULT
 preprocess = weights.transforms()
 resnet = resnet50(weights=weights)
 resnet.eval()
 def resize_image(image_path, target_height):
-    # Open the image file
     with Image.open(image_path) as img:
-        # Calculate the ratio to resize the image to the target height
         ratio = target_height / float(img.size[1])
-        # Calculate the new width based on the aspect ratio
         new_width = int(float(img.size[0]) * ratio)
-        # Resize the image
         resized_img = img.resize((new_width, target_height), Image.LANCZOS)
-        # Save the resized image
-        #resized_img.save(output_path)
         return resized_img
 @spaces.GPU(enable_queue=True)
 def inference(input_image, prompt, a_prompt, n_prompt, denoise_steps, upscale, alpha, cfg, seed):
-    #tempo fix for seed equals-1
     if seed == -1:
         seed = 0
     input_image = resize_image(input_image, 512)
     process_size = 768
     resize_preproc = transforms.Compose([
         transforms.Resize(process_size, interpolation=transforms.InterpolationMode.BILINEAR),
     ])
-    # Get the current timestamp
     timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
     with torch.no_grad():
         seed_everything(seed)
         generator = torch.Generator(device=device)
-        input_image = input_image.convert('RGB')
         batch = preprocess(input_image).unsqueeze(0)
         prediction = resnet(batch).squeeze(0).softmax(0)
         class_id = prediction.argmax().item()
         score = prediction[class_id].item()
         category_name = weights.meta["categories"][class_id]
         if score >= 0.1:
-            prompt += f"{category_name}" if prompt=='' else f", {category_name}"
-        prompt = a_prompt if prompt=='' else f"{prompt}, {a_prompt}"
         ori_width, ori_height = input_image.size
         resize_flag = False
         rscale = upscale
-        input_image = input_image.resize((input_image.size[0]*rscale, input_image.size[1]*rscale))
-        #if min(validation_image.size) < process_size:
-        #    validation_image = resize_preproc(validation_image)
-        input_image = input_image.resize((input_image.size[0]//8*8, input_image.size[1]//8*8))
         width, height = input_image.size
-        resize_flag = True #
         try:
             image = validation_pipeline(
-                    None, prompt, input_image, num_inference_steps=denoise_steps, generator=generator, height=height, width=width, guidance_scale=cfg,
-                    negative_prompt=n_prompt, conditioning_scale=alpha, eta=0.0,
-                ).images[0]
-            if True: #alpha<1.0:
-                image = wavelet_color_fix(image, input_image)
-            if resize_flag:
-                image = image.resize((ori_width*rscale, ori_height*rscale))
         except Exception as e:
             print(e)
             image = Image.new(mode="RGB", size=(512, 512))
-    # Convert and save the image as JPEG
-    image.save(f'result_{timestamp}.jpg', 'JPEG')
-    # Convert and save the image as JPEG
-    input_image.save(f'input_{timestamp}.jpg', 'JPEG')
-    return f"input_{timestamp}.jpg", f"result_{timestamp}.jpg", f"result_{timestamp}.jpg"
-title = "Pixel-Aware Stable Diffusion for Real-ISR"
-description = "Gradio Demo for PASD Real-ISR. To use it, simply upload your image, or click one of the examples to load them."
-article = "<a href='https://github.com/yangxy/PASD' target='_blank'>Github Repo Pytorch</a>"
-#examples=[['samples/27d38eeb2dbbe7c9.png'],['samples/629e4da70703193b.png']]
 css = """
 #col-container{
@@ -221,9 +243,9 @@ with gr.Blocks() as demo:
             Pixel-Aware Stable Diffusion for Realistic Image Super-resolution and Personalized Stylization
         </p>
         <p id="project-links" align="center">
-            <a href='https://github.com/yangxy/PASD'><img src='https://img.shields.io/badge/Project-Page-Green'></a>
-            <a href='https://huggingface.co/papers/2308.14469'><img src='https://img.shields.io/badge/Paper-Arxiv-red'></a>
-        </p>
         <p style="margin:12px auto;display: flex;justify-content: center;">
             <a href="https://huggingface.co/spaces/fffiloni/PASD?duplicate=true">
                 <img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg.svg" alt="Duplicate this Space">
@@ -233,16 +255,41 @@ with gr.Blocks() as demo:
         with gr.Row():
             with gr.Column():
-                input_image = gr.Image(type="filepath", sources=["upload"], value="PASD/samples/frog.png")
                 prompt_in = gr.Textbox(label="Prompt", value="Frog")
                 with gr.Accordion(label="Advanced settings", open=False):
-                    added_prompt = gr.Textbox(label="Added Prompt", value='clean, high-resolution, 8k, best quality, masterpiece')
-                    neg_prompt = gr.Textbox(label="Negative Prompt", value='dotted, noise, blur, lowres, oversmooth, longbody, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality')
                     denoise_steps = gr.Slider(label="Denoise Steps", minimum=10, maximum=50, value=20, step=1)
                     upsample_scale = gr.Slider(label="Upsample Scale", minimum=1, maximum=4, value=2, step=1)
                     condition_scale = gr.Slider(label="Conditioning Scale", minimum=0.5, maximum=1.5, value=1.1, step=0.1)
-                    classifier_free_guidance = gr.Slider(label="Classifier-free Guidance", minimum=0.1, maximum=10.0, value=7.5, step=0.1)
-                    seed = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, randomize=True)
                 submit_btn = gr.Button("Submit")
             with gr.Column():
@@ -253,18 +300,26 @@ with gr.Blocks() as demo:
     submit_btn.click(
         fn=inference,
         inputs=[
-            input_image, prompt_in,
-            added_prompt, neg_prompt,
             denoise_steps,
-            upsample_scale, condition_scale,
-            classifier_free_guidance, seed
         ],
         outputs=[
             before_img,
             after_img,
-            file_output
         ],
-        api_visibility="private"
     )
-demo.queue(max_size=10).launch(ssr_mode=False, mcp_server=False, css=css)

+import sys
+import os
 import types
+import random
+import datetime
+from pathlib import Path
+import torch
+import numpy as np
+import einops
+import spaces
+import gradio as gr
 import huggingface_hub
+from PIL import Image
+from torchvision import transforms
+import torch.nn.functional as F
+from torchvision.models import resnet50, ResNet50_Weights
+from pytorch_lightning import seed_everything
+from transformers import CLIPTextModel, CLIPTokenizer, CLIPImageProcessor
+from diffusers import (
+    AutoencoderKL,
+    DDIMScheduler,
+    PNDMScheduler,
+    DPMSolverMultistepScheduler,
+    UniPCMultistepScheduler,
+)
+# ---- GPU spoof for Spaces env compatibility ----
+torch.cuda.get_device_capability = lambda *args, **kwargs: (8, 6)
+torch.cuda.get_device_properties = lambda *args, **kwargs: types.SimpleNamespace(
+    name="NVIDIA A10G",
+    major=8,
+    minor=6,
+    total_memory=23836033024,
+    multi_processor_count=80,
+)
+# ---- Download required assets ----
 huggingface_hub.snapshot_download(
+    repo_id="camenduru/PASD",
     allow_patterns=[
+        "pasd/**",
+        "pasd_light/**",
+        "pasd_light_rrdb/**",
+        "pasd_rrdb/**",
     ],
+    local_dir="PASD/runs",
 )
 huggingface_hub.hf_hub_download(
+    repo_id="camenduru/PASD",
+    filename="majicmixRealistic_v6.safetensors",
+    local_dir="PASD/checkpoints/personalized_models",
 )
 huggingface_hub.hf_hub_download(
+    repo_id="akhaliq/RetinaFace-R50",
+    filename="RetinaFace-R50.pth",
+    local_dir="PASD/annotator/ckpts",
 )
+# ---- Local PASD path ----
+sys.path.append("./PASD")
+# ---- Compat shim for older custom pipelines expecting diffusers.pipeline_utils ----
 try:
     import diffusers.pipelines.pipeline_utils as _pipeline_utils
     sys.modules["diffusers.pipeline_utils"] = _pipeline_utils
 from myutils.wavelet_color_fix import wavelet_color_fix
 from annotator.retinaface import RetinaFaceDetection
+# ---- Model selection ----
 use_pasd_light = False
 face_detector = RetinaFaceDetection()
 pretrained_model_path = "stable-diffusion-v1-5/stable-diffusion-v1-5"
 ckpt_path = "PASD/runs/pasd/checkpoint-100000"
 dreambooth_lora_path = "PASD/checkpoints/personalized_models/majicmixRealistic_v6.safetensors"
 weight_dtype = torch.float16
 device = "cuda"
+# ---- Load models ----
 scheduler = UniPCMultistepScheduler.from_pretrained(pretrained_model_path, subfolder="scheduler")
 text_encoder = CLIPTextModel.from_pretrained(pretrained_model_path, subfolder="text_encoder")
 tokenizer = CLIPTokenizer.from_pretrained(pretrained_model_path, subfolder="tokenizer")
 feature_extractor = CLIPImageProcessor.from_pretrained(pretrained_model_path, subfolder="feature_extractor")
 unet = UNet2DConditionModel.from_pretrained(ckpt_path, subfolder="unet")
 controlnet = ControlNetModel.from_pretrained(ckpt_path, subfolder="controlnet")
 vae.requires_grad_(False)
 text_encoder.requires_grad_(False)
 unet.requires_grad_(False)
 controlnet.to(device, dtype=weight_dtype)
 validation_pipeline = StableDiffusionControlNetPipeline(
+    vae=vae,
+    text_encoder=text_encoder,
+    tokenizer=tokenizer,
+    feature_extractor=feature_extractor,
+    unet=unet,
+    controlnet=controlnet,
+    scheduler=scheduler,
+    safety_checker=None,
+    requires_safety_checker=False,
+)
+# validation_pipeline.enable_vae_tiling()
 validation_pipeline._init_tiled_vae(decoder_tile_size=224)
+# ---- ResNet auto-tag helper ----
 weights = ResNet50_Weights.DEFAULT
 preprocess = weights.transforms()
 resnet = resnet50(weights=weights)
 resnet.eval()
 def resize_image(image_path, target_height):
     with Image.open(image_path) as img:
         ratio = target_height / float(img.size[1])
         new_width = int(float(img.size[0]) * ratio)
         resized_img = img.resize((new_width, target_height), Image.LANCZOS)
         return resized_img
 @spaces.GPU(enable_queue=True)
 def inference(input_image, prompt, a_prompt, n_prompt, denoise_steps, upscale, alpha, cfg, seed):
     if seed == -1:
         seed = 0
     input_image = resize_image(input_image, 512)
     process_size = 768
     resize_preproc = transforms.Compose([
         transforms.Resize(process_size, interpolation=transforms.InterpolationMode.BILINEAR),
     ])
     timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
     with torch.no_grad():
         seed_everything(seed)
         generator = torch.Generator(device=device)
+        generator.manual_seed(seed)
+        input_image = input_image.convert("RGB")
         batch = preprocess(input_image).unsqueeze(0)
         prediction = resnet(batch).squeeze(0).softmax(0)
         class_id = prediction.argmax().item()
         score = prediction[class_id].item()
         category_name = weights.meta["categories"][class_id]
         if score >= 0.1:
+            prompt += f"{category_name}" if prompt == "" else f", {category_name}"
+        prompt = a_prompt if prompt == "" else f"{prompt}, {a_prompt}"
         ori_width, ori_height = input_image.size
         resize_flag = False
         rscale = upscale
+        input_image = input_image.resize((input_image.size[0] * rscale, input_image.size[1] * rscale))
+        input_image = input_image.resize((input_image.size[0] // 8 * 8, input_image.size[1] // 8 * 8))
         width, height = input_image.size
+        resize_flag = True
         try:
             image = validation_pipeline(
+                None,
+                prompt,
+                input_image,
+                num_inference_steps=denoise_steps,
+                generator=generator,
+                height=height,
+                width=width,
+                guidance_scale=cfg,
+                negative_prompt=n_prompt,
+                conditioning_scale=alpha,
+                eta=0.0,
+            ).images[0]
+            image = wavelet_color_fix(image, input_image)
+            if resize_flag:
+                image = image.resize((ori_width * rscale, ori_height * rscale))
         except Exception as e:
             print(e)
             image = Image.new(mode="RGB", size=(512, 512))
+    result_path = f"result_{timestamp}.jpg"
+    input_path = f"input_{timestamp}.jpg"
+    image.save(result_path, "JPEG")
+    input_image.save(input_path, "JPEG")
+    return input_path, result_path, result_path
 css = """
 #col-container{
             Pixel-Aware Stable Diffusion for Realistic Image Super-resolution and Personalized Stylization
         </p>
         <p id="project-links" align="center">
+            <a href="https://github.com/yangxy/PASD"><img src="https://img.shields.io/badge/Project-Page-Green"></a>
+            <a href="https://huggingface.co/papers/2308.14469"><img src="https://img.shields.io/badge/Paper-Arxiv-red"></a>
+        </p>
         <p style="margin:12px auto;display: flex;justify-content: center;">
             <a href="https://huggingface.co/spaces/fffiloni/PASD?duplicate=true">
                 <img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg.svg" alt="Duplicate this Space">
         with gr.Row():
             with gr.Column():
+                input_image = gr.Image(
+                    type="filepath",
+                    sources=["upload"],
+                    value="PASD/samples/frog.png",
+                    label="Input image",
+                )
                 prompt_in = gr.Textbox(label="Prompt", value="Frog")
                 with gr.Accordion(label="Advanced settings", open=False):
+                    added_prompt = gr.Textbox(
+                        label="Added Prompt",
+                        value="clean, high-resolution, 8k, best quality, masterpiece",
+                    )
+                    neg_prompt = gr.Textbox(
+                        label="Negative Prompt",
+                        value="dotted, noise, blur, lowres, oversmooth, longbody, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
+                    )
                     denoise_steps = gr.Slider(label="Denoise Steps", minimum=10, maximum=50, value=20, step=1)
                     upsample_scale = gr.Slider(label="Upsample Scale", minimum=1, maximum=4, value=2, step=1)
                     condition_scale = gr.Slider(label="Conditioning Scale", minimum=0.5, maximum=1.5, value=1.1, step=0.1)
+                    classifier_free_guidance = gr.Slider(
+                        label="Classifier-free Guidance",
+                        minimum=0.1,
+                        maximum=10.0,
+                        value=7.5,
+                        step=0.1,
+                    )
+                    seed = gr.Slider(
+                        label="Seed",
+                        minimum=-1,
+                        maximum=2147483647,
+                        step=1,
+                        randomize=True,
+                    )
                 submit_btn = gr.Button("Submit")
             with gr.Column():
     submit_btn.click(
         fn=inference,
         inputs=[
+            input_image,
+            prompt_in,
+            added_prompt,
+            neg_prompt,
             denoise_steps,
+            upsample_scale,
+            condition_scale,
+            classifier_free_guidance,
+            seed,
         ],
         outputs=[
             before_img,
             after_img,
+            file_output,
         ],
+        api_visibility="private",
     )
+demo.queue(max_size=10).launch(
+    ssr_mode=False,
+    mcp_server=False,
+    css=css,
+)