Spaces:

fffiloni
/

PASD

Running on Zero

App Files Files Community

fffiloni commited on 24 days ago

Commit

7ab1133

verified ·

1 Parent(s): 29f5a64

Update app_zero.py

Browse files

Files changed (1) hide show

app_zero.py +83 -63

app_zero.py CHANGED Viewed

@@ -12,6 +12,7 @@ import huggingface_hub
 if not hasattr(huggingface_hub, "cached_download"):
     def cached_download(*args, **kwargs):
         return huggingface_hub.hf_hub_download(*args, **kwargs)
     huggingface_hub.cached_download = cached_download
 import torch
@@ -19,12 +20,10 @@ import numpy as np
 import einops
 import spaces
 import gradio as gr
 from PIL import Image
 from torchvision import transforms
 import torch.nn.functional as F
 from torchvision.models import resnet50, ResNet50_Weights
 from pytorch_lightning import seed_everything
 from transformers import CLIPTextModel, CLIPTokenizer, CLIPImageProcessor
 from diffusers import (
@@ -78,7 +77,6 @@ huggingface_hub.hf_hub_download(
 # -------------------------------------------------------------------
 sys.path.append("./PASD")
 # -------------------------------------------------------------------
 # Runtime patching helpers
 # -------------------------------------------------------------------
@@ -130,7 +128,6 @@ except Exception:
             pass
 """
     original = text
     # Enlève d'anciens imports simples
@@ -147,7 +144,7 @@ except Exception:
     # Enlève d'anciens blocs try/except cassés liés à ce mixin
     text = re.sub(
-        r"(?ms)^try:\n(?:(?:    |\t).*\n)+?except Exception:\n(?:(?:    |\t).*\n)+?(?=^(?:class|def|@|from |import |\Z))",
         lambda m: "" if "FromOriginalControl" in m.group(0) else m.group(0),
         text,
     )
@@ -189,10 +186,10 @@ def patch_pasd_for_diffusers() -> None:
     patch_file(
         "./PASD/models/pasd/unet_2d_condition.py",
         [
-            ("    PositionNet,\n", ""),
             (
-                "    GLIGENTextBoundingboxProjection,\n",
-                "    GLIGENTextBoundingboxProjection as PositionNet,\n",
             ),
         ],
     )
@@ -251,25 +248,33 @@ weight_dtype = torch.float16
 device = "cuda"
 scheduler = UniPCMultistepScheduler.from_pretrained(
-    pretrained_model_path, subfolder="scheduler"
 )
 text_encoder = CLIPTextModel.from_pretrained(
-    pretrained_model_path, subfolder="text_encoder"
 )
 tokenizer = CLIPTokenizer.from_pretrained(
-    pretrained_model_path, subfolder="tokenizer"
 )
 vae = AutoencoderKL.from_pretrained(
-    pretrained_model_path, subfolder="vae"
 )
 feature_extractor = CLIPImageProcessor.from_pretrained(
-    pretrained_model_path, subfolder="feature_extractor"
 )
 unet = UNet2DConditionModel.from_pretrained(
-    ckpt_path, subfolder="unet"
 )
 controlnet = ControlNetModel.from_pretrained(
-    ckpt_path, subfolder="controlnet"
 )
 vae.requires_grad_(False)
@@ -278,7 +283,10 @@ unet.requires_grad_(False)
 controlnet.requires_grad_(False)
 unet, vae, text_encoder = load_dreambooth_lora(
-    unet, vae, text_encoder, dreambooth_lora_path
 )
 text_encoder.to(device, dtype=weight_dtype)
@@ -317,18 +325,37 @@ def resize_image(image_path: str, target_height: int) -> Image.Image:
 @spaces.GPU(enable_queue=True)
-def inference(
     input_image,
     prompt,
-    a_prompt,
-    n_prompt,
     denoise_steps,
     upscale,
     alpha,
-    cfg,
     seed,
-    progress=gr.Progress(track_tqdm=True)
 ):
     if seed == -1:
         seed = 0
@@ -351,17 +378,18 @@ def inference(
         if score >= 0.1:
             prompt += f"{category_name}" if prompt == "" else f", {category_name}"
-        prompt = a_prompt if prompt == "" else f"{prompt}, {a_prompt}"
         ori_width, ori_height = input_image.size
         rscale = upscale
         input_image = input_image.resize(
             (input_image.size[0] * rscale, input_image.size[1] * rscale)
         )
         input_image = input_image.resize(
             (input_image.size[0] // 8 * 8, input_image.size[1] // 8 * 8)
         )
         width, height = input_image.size
         try:
@@ -373,14 +401,15 @@ def inference(
                 generator=generator,
                 height=height,
                 width=width,
-                guidance_scale=cfg,
-                negative_prompt=n_prompt,
                 conditioning_scale=alpha,
                 eta=0.0,
             ).images[0]
             image = wavelet_color_fix(image, input_image)
             image = image.resize((ori_width * rscale, ori_height * rscale))
         except Exception as e:
             print(f"[inference] error: {e}")
             image = Image.new(mode="RGB", size=(512, 512))
@@ -412,23 +441,14 @@ css = """
 with gr.Blocks() as demo:
     with gr.Column(elem_id="col-container"):
-        gr.HTML("""
-        <h2 style="text-align: center;">
-            PASD Magnify
-        </h2>
-        <p style="text-align: center;">
-            Pixel-Aware Stable Diffusion for Realistic Image Super-resolution and Personalized Stylization
-        </p>
-        <p id="project-links" align="center">
-            <a href="https://github.com/yangxy/PASD"><img src="https://img.shields.io/badge/Project-Page-Green"></a>
-            <a href="https://huggingface.co/papers/2308.14469"><img src="https://img.shields.io/badge/Paper-Arxiv-red"></a>
-        </p>
-        <p style="margin:12px auto;display: flex;justify-content: center;">
-            <a href="https://huggingface.co/spaces/fffiloni/PASD?duplicate=true">
-                <img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg.svg" alt="Duplicate this Space">
-            </a>
-        </p>
-        """)
         with gr.Row():
             with gr.Column():
@@ -492,29 +512,29 @@ with gr.Blocks() as demo:
                 after_img = gr.Image(label="Result")
                 file_output = gr.File(label="Downloadable image result")
-    submit_btn.click(
-        fn=inference,
-        inputs=[
-            input_image,
-            prompt_in,
-            added_prompt,
-            neg_prompt,
-            denoise_steps,
-            upsample_scale,
-            condition_scale,
-            classifier_free_guidance,
-            seed,
-        ],
-        outputs=[
-            before_img,
-            after_img,
-            file_output,
-        ],
-        api_visibility="private",
-    )
 demo.queue(max_size=10).launch(
     ssr_mode=False,
-    mcp_server=False,
     css=css,
 )

 if not hasattr(huggingface_hub, "cached_download"):
     def cached_download(*args, **kwargs):
         return huggingface_hub.hf_hub_download(*args, **kwargs)
     huggingface_hub.cached_download = cached_download
 import torch
 import einops
 import spaces
 import gradio as gr
 from PIL import Image
 from torchvision import transforms
 import torch.nn.functional as F
 from torchvision.models import resnet50, ResNet50_Weights
 from pytorch_lightning import seed_everything
 from transformers import CLIPTextModel, CLIPTokenizer, CLIPImageProcessor
 from diffusers import (
 # -------------------------------------------------------------------
 sys.path.append("./PASD")
 # -------------------------------------------------------------------
 # Runtime patching helpers
 # -------------------------------------------------------------------
             pass
 """
     original = text
     # Enlève d'anciens imports simples
     # Enlève d'anciens blocs try/except cassés liés à ce mixin
     text = re.sub(
+        r"(?ms)^try:\n(?:(?: |\t).*\n)+?except Exception:\n(?:(?: |\t).*\n)+?(?=^(?:class|def|@|from |import |\Z))",
         lambda m: "" if "FromOriginalControl" in m.group(0) else m.group(0),
         text,
     )
     patch_file(
         "./PASD/models/pasd/unet_2d_condition.py",
         [
+            (" PositionNet,\n", ""),
             (
+                " GLIGENTextBoundingboxProjection,\n",
+                " GLIGENTextBoundingboxProjection as PositionNet,\n",
             ),
         ],
     )
 device = "cuda"
 scheduler = UniPCMultistepScheduler.from_pretrained(
+    pretrained_model_path,
+    subfolder="scheduler",
 )
 text_encoder = CLIPTextModel.from_pretrained(
+    pretrained_model_path,
+    subfolder="text_encoder",
 )
 tokenizer = CLIPTokenizer.from_pretrained(
+    pretrained_model_path,
+    subfolder="tokenizer",
 )
 vae = AutoencoderKL.from_pretrained(
+    pretrained_model_path,
+    subfolder="vae",
 )
 feature_extractor = CLIPImageProcessor.from_pretrained(
+    pretrained_model_path,
+    subfolder="feature_extractor",
 )
 unet = UNet2DConditionModel.from_pretrained(
+    ckpt_path,
+    subfolder="unet",
 )
 controlnet = ControlNetModel.from_pretrained(
+    ckpt_path,
+    subfolder="controlnet",
 )
 vae.requires_grad_(False)
 controlnet.requires_grad_(False)
 unet, vae, text_encoder = load_dreambooth_lora(
+    unet,
+    vae,
+    text_encoder,
+    dreambooth_lora_path,
 )
 text_encoder.to(device, dtype=weight_dtype)
 @spaces.GPU(enable_queue=True)
+def super_resolve_image(
     input_image,
     prompt,
+    added_prompt,
+    negative_prompt,
     denoise_steps,
     upscale,
     alpha,
+    guidance_scale,
     seed,
+    progress=gr.Progress(track_tqdm=True),
 ):
+    """
+    Super-resolve an input image with PASD and optional prompt guidance.
+    Use this tool when you need to generate a higher-resolution restored image from an input image.
+    Args:
+        input_image (str): File path to the input image.
+        prompt (str): Main text prompt describing the desired image content.
+        added_prompt (str): Additional quality or style prompt appended to the main prompt.
+        negative_prompt (str): Negative prompt describing unwanted visual qualities.
+        denoise_steps (int): Number of denoising steps used by the diffusion pipeline.
+        upscale (int): Integer upscale factor applied to the image.
+        alpha (float): Conditioning scale passed to the ControlNet pipeline.
+        guidance_scale (float): Classifier-free guidance scale passed to the diffusion pipeline.
+        seed (int): Random seed, where -1 is converted to 0.
+    Returns:
+        tuple: Input image path, result image path, and downloadable result image path.
+    """
     if seed == -1:
         seed = 0
         if score >= 0.1:
             prompt += f"{category_name}" if prompt == "" else f", {category_name}"
+        prompt = added_prompt if prompt == "" else f"{prompt}, {added_prompt}"
         ori_width, ori_height = input_image.size
         rscale = upscale
         input_image = input_image.resize(
             (input_image.size[0] * rscale, input_image.size[1] * rscale)
         )
         input_image = input_image.resize(
             (input_image.size[0] // 8 * 8, input_image.size[1] // 8 * 8)
         )
         width, height = input_image.size
         try:
                 generator=generator,
                 height=height,
                 width=width,
+                guidance_scale=guidance_scale,
+                negative_prompt=negative_prompt,
                 conditioning_scale=alpha,
                 eta=0.0,
             ).images[0]
             image = wavelet_color_fix(image, input_image)
             image = image.resize((ori_width * rscale, ori_height * rscale))
         except Exception as e:
             print(f"[inference] error: {e}")
             image = Image.new(mode="RGB", size=(512, 512))
 with gr.Blocks() as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown("""
+##  PASD Magnify
+Pixel-Aware Stable Diffusion for Realistic Image Super-resolution and Personalized Stylization
+ <a href='https://arxiv.org/abs/2308.14469' target='_blank'><img src='https://img.shields.io/badge/arXiv-2308.14469-red'></a> <a href='https://github.com/yangxy/PASD' target='_blank'><img src='https://img.shields.io/badge/GitHub-Code-blue'></a>
+""")
         with gr.Row():
             with gr.Column():
                 after_img = gr.Image(label="Result")
                 file_output = gr.File(label="Downloadable image result")
+        submit_btn.click(
+            fn=super_resolve_image,
+            inputs=[
+                input_image,
+                prompt_in,
+                added_prompt,
+                neg_prompt,
+                denoise_steps,
+                upsample_scale,
+                condition_scale,
+                classifier_free_guidance,
+                seed,
+            ],
+            outputs=[
+                before_img,
+                after_img,
+                file_output,
+            ],
+            api_visibility="public",
+        )
 demo.queue(max_size=10).launch(
     ssr_mode=False,
+    mcp_server=True,
     css=css,
 )