Spaces:

fffiloni
/

PASD

Running on Zero

App Files Files Community

Restart please

by Alchemist85 - opened Nov 19, 2025

base: refs/heads/main

←

from: refs/pr/9

Discussion Files changed

+154

-446

This PR is in draft mode

Files changed (3) hide show

README.md +1 -1
app_zero.py +145 -430
requirements.txt +8 -15

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: ✨
 colorFrom: indigo
 colorTo: pink
 sdk: gradio
-sdk_version: 6.14.0
 app_file: app_zero.py
 pinned: false
 short_description: Magnify subject details and enhance image quality

 colorFrom: indigo
 colorTo: pink
 sdk: gradio
+sdk_version: 5.44.0
 app_file: app_zero.py
 pinned: false
 short_description: Magnify subject details and enhance image quality

app_zero.py CHANGED Viewed

@@ -1,229 +1,55 @@
-import sys
-import types
-import datetime
-import re
-from pathlib import Path
-import huggingface_hub
-# -------------------------------------------------------------------
-# Compatibility shim: older diffusers may still expect cached_download
-# -------------------------------------------------------------------
-if not hasattr(huggingface_hub, "cached_download"):
-    def cached_download(*args, **kwargs):
-        return huggingface_hub.hf_hub_download(*args, **kwargs)
-    huggingface_hub.cached_download = cached_download
 import torch
-import numpy as np
-import einops
-import spaces
-import gradio as gr
-from PIL import Image
-from torchvision import transforms
-import torch.nn.functional as F
-from torchvision.models import resnet50, ResNet50_Weights
-from pytorch_lightning import seed_everything
-from transformers import CLIPTextModel, CLIPTokenizer, CLIPImageProcessor
-from diffusers import (
-    AutoencoderKL,
-    DDIMScheduler,
-    PNDMScheduler,
-    DPMSolverMultistepScheduler,
-    UniPCMultistepScheduler,
-)
-# -------------------------------------------------------------------
-# GPU spoof for Spaces env compatibility
-# -------------------------------------------------------------------
 torch.cuda.get_device_capability = lambda *args, **kwargs: (8, 6)
-torch.cuda.get_device_properties = lambda *args, **kwargs: types.SimpleNamespace(
-    name="NVIDIA A10G",
-    major=8,
-    minor=6,
-    total_memory=23836033024,
-    multi_processor_count=80,
-)
-# -------------------------------------------------------------------
-# Download required assets
-# -------------------------------------------------------------------
 huggingface_hub.snapshot_download(
-    repo_id="camenduru/PASD",
     allow_patterns=[
-        "pasd/**",
-        "pasd_light/**",
-        "pasd_light_rrdb/**",
-        "pasd_rrdb/**",
     ],
-    local_dir="PASD/runs",
 )
 huggingface_hub.hf_hub_download(
-    repo_id="camenduru/PASD",
-    filename="majicmixRealistic_v6.safetensors",
-    local_dir="PASD/checkpoints/personalized_models",
 )
 huggingface_hub.hf_hub_download(
-    repo_id="akhaliq/RetinaFace-R50",
-    filename="RetinaFace-R50.pth",
-    local_dir="PASD/annotator/ckpts",
 )
-# -------------------------------------------------------------------
-# PASD local path
-# -------------------------------------------------------------------
-sys.path.append("./PASD")
-# -------------------------------------------------------------------
-# Runtime patching helpers
-# -------------------------------------------------------------------
-def patch_file(path_str: str, replacements: list[tuple[str, str]]) -> None:
-    path = Path(path_str)
-    if not path.exists():
-        print(f"[patch] file not found: {path}")
-        return
-    try:
-        text = path.read_text(encoding="utf-8")
-    except Exception as e:
-        print(f"[patch] failed reading {path}: {e}")
-        return
-    original = text
-    for old, new in replacements:
-        text = text.replace(old, new)
-    if text != original:
-        try:
-            path.write_text(text, encoding="utf-8")
-            print(f"[patch] updated: {path}")
-        except Exception as e:
-            print(f"[patch] failed writing {path}: {e}")
-    else:
-        print(f"[patch] no changes: {path}")
-def patch_controlnet_loader_import(path_str: str) -> None:
-    path = Path(path_str)
-    if not path.exists():
-        print(f"[patch] file not found: {path}")
-        return
-    try:
-        text = path.read_text(encoding="utf-8")
-    except Exception as e:
-        print(f"[patch] failed reading {path}: {e}")
-        return
-    safe_block = """try:
-    from diffusers.loaders import FromOriginalControlNetMixin as FromOriginalControlnetMixin
-except Exception:
-    try:
-        from diffusers.loaders import FromOriginalControlnetMixin
-    except Exception:
-        class FromOriginalControlnetMixin:
-            pass
-"""
-    original = text
-    # Enlève d'anciens imports simples
-    text = re.sub(
-        r"(?m)^from diffusers\.loaders[^\n]*FromOriginalControl\w*Mixin[^\n]*\n",
-        "",
-        text,
-    )
-    text = re.sub(
-        r"(?m)^from diffusers\.loaders\.single_file_model[^\n]*FromOriginal\w+[^\n]*\n",
-        "",
-        text,
-    )
-    # Enlève d'anciens blocs try/except cassés liés à ce mixin
-    text = re.sub(
-        r"(?ms)^try:\n(?:(?: |\t).*\n)+?except Exception:\n(?:(?: |\t).*\n)+?(?=^(?:class|def|@|from |import |\Z))",
-        lambda m: "" if "FromOriginalControl" in m.group(0) else m.group(0),
-        text,
-    )
-    # Normalise la référence de mixin dans le reste du fichier
-    text = text.replace("FromOriginalControlNetMixin", "FromOriginalControlnetMixin")
-    marker = "class ControlNetConditioningEmbedding"
-    if safe_block not in text:
-        idx = text.find(marker)
-        if idx != -1:
-            text = text[:idx] + safe_block + text[idx:]
-        else:
-            text = safe_block + text
-    if text != original:
-        try:
-            path.write_text(text, encoding="utf-8")
-            print(f"[patch] normalized: {path}")
-        except Exception as e:
-            print(f"[patch] failed writing {path}: {e}")
-    else:
-        print(f"[patch] no changes: {path}")
-def patch_pasd_for_diffusers() -> None:
-    # pipeline_utils path moved
-    patch_file(
-        "./PASD/pipelines/pipeline_pasd.py",
-        [
-            (
-                "from diffusers.pipeline_utils import DiffusionPipeline",
-                "from diffusers import DiffusionPipeline",
-            ),
-        ],
-    )
-    # PositionNet -> GLIGENTextBoundingboxProjection alias
-    patch_file(
-        "./PASD/models/pasd/unet_2d_condition.py",
-        [
-            (" PositionNet,\n", ""),
-            (
-                " GLIGENTextBoundingboxProjection,\n",
-                " GLIGENTextBoundingboxProjection as PositionNet,\n",
-            ),
-        ],
-    )
-    # internal module paths moved in newer diffusers
-    patch_file(
-        "./PASD/models/pasd/unet_2d_blocks.py",
-        [
-            (
-                "from diffusers.models.attention import AdaGroupNorm",
-                "from diffusers.models.normalization import AdaGroupNorm",
-            ),
-            (
-                "from diffusers.models.dual_transformer_2d import DualTransformer2DModel",
-                "from diffusers.models.transformers.dual_transformer_2d import DualTransformer2DModel",
-            ),
-            (
-                "from diffusers.models.transformer_2d import Transformer2DModel",
-                "from diffusers.models.transformers.transformer_2d import Transformer2DModel",
-            ),
-        ],
-    )
-    # robust controlnet patch
-    patch_controlnet_loader_import("./PASD/models/pasd/controlnet.py")
-patch_pasd_for_diffusers()
-# -------------------------------------------------------------------
-# Import PASD modules only after patching
-# -------------------------------------------------------------------
 from pipelines.pipeline_pasd import StableDiffusionControlNetPipeline
-from myutils.misc import load_dreambooth_lora
 from myutils.wavelet_color_fix import wavelet_color_fix
 from annotator.retinaface import RetinaFaceDetection
@@ -237,57 +63,27 @@ else:
     from models.pasd.unet_2d_condition import UNet2DConditionModel
     from models.pasd.controlnet import ControlNetModel
-# -------------------------------------------------------------------
-# Model setup
-# -------------------------------------------------------------------
-pretrained_model_path = "stable-diffusion-v1-5/stable-diffusion-v1-5"
 ckpt_path = "PASD/runs/pasd/checkpoint-100000"
 dreambooth_lora_path = "PASD/checkpoints/personalized_models/majicmixRealistic_v6.safetensors"
 weight_dtype = torch.float16
 device = "cuda"
-scheduler = UniPCMultistepScheduler.from_pretrained(
-    pretrained_model_path,
-    subfolder="scheduler",
-)
-text_encoder = CLIPTextModel.from_pretrained(
-    pretrained_model_path,
-    subfolder="text_encoder",
-)
-tokenizer = CLIPTokenizer.from_pretrained(
-    pretrained_model_path,
-    subfolder="tokenizer",
-)
-vae = AutoencoderKL.from_pretrained(
-    pretrained_model_path,
-    subfolder="vae",
-)
-feature_extractor = CLIPImageProcessor.from_pretrained(
-    pretrained_model_path,
-    subfolder="feature_extractor",
-)
-unet = UNet2DConditionModel.from_pretrained(
-    ckpt_path,
-    subfolder="unet",
-)
-controlnet = ControlNetModel.from_pretrained(
-    ckpt_path,
-    subfolder="controlnet",
-)
 vae.requires_grad_(False)
 text_encoder.requires_grad_(False)
 unet.requires_grad_(False)
 controlnet.requires_grad_(False)
-unet, vae, text_encoder = load_dreambooth_lora(
-    unet,
-    vae,
-    text_encoder,
-    dreambooth_lora_path,
-)
 text_encoder.to(device, dtype=weight_dtype)
 vae.to(device, dtype=weight_dtype)
@@ -295,133 +91,101 @@ unet.to(device, dtype=weight_dtype)
 controlnet.to(device, dtype=weight_dtype)
 validation_pipeline = StableDiffusionControlNetPipeline(
-    vae=vae,
-    text_encoder=text_encoder,
-    tokenizer=tokenizer,
-    feature_extractor=feature_extractor,
-    unet=unet,
-    controlnet=controlnet,
-    scheduler=scheduler,
-    safety_checker=None,
-    requires_safety_checker=False,
-)
 validation_pipeline._init_tiled_vae(decoder_tile_size=224)
-# -------------------------------------------------------------------
-# ResNet helper
-# -------------------------------------------------------------------
 weights = ResNet50_Weights.DEFAULT
 preprocess = weights.transforms()
 resnet = resnet50(weights=weights)
 resnet.eval()
-def resize_image(image_path: str, target_height: int) -> Image.Image:
     with Image.open(image_path) as img:
         ratio = target_height / float(img.size[1])
         new_width = int(float(img.size[0]) * ratio)
-        return img.resize((new_width, target_height), Image.LANCZOS)
 @spaces.GPU(enable_queue=True)
-def super_resolve_image(
-    input_image,
-    prompt,
-    added_prompt,
-    negative_prompt,
-    denoise_steps,
-    upscale,
-    alpha,
-    guidance_scale,
-    seed,
-    progress=gr.Progress(track_tqdm=True),
-):
-    """
-    Super-resolve an input image with PASD and optional prompt guidance.
-    Use this tool when you need to generate a higher-resolution restored image from an input image.
-    Args:
-        input_image (str): File path to the input image.
-        prompt (str): Main text prompt describing the desired image content.
-        added_prompt (str): Additional quality or style prompt appended to the main prompt.
-        negative_prompt (str): Negative prompt describing unwanted visual qualities.
-        denoise_steps (int): Number of denoising steps used by the diffusion pipeline.
-        upscale (int): Integer upscale factor applied to the image.
-        alpha (float): Conditioning scale passed to the ControlNet pipeline.
-        guidance_scale (float): Classifier-free guidance scale passed to the diffusion pipeline.
-        seed (int): Random seed, where -1 is converted to 0.
-    Returns:
-        tuple: Input image path, result image path, and downloadable result image path.
-    """
     if seed == -1:
         seed = 0
     input_image = resize_image(input_image, 512)
     timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
     with torch.no_grad():
         seed_everything(seed)
         generator = torch.Generator(device=device)
-        generator.manual_seed(seed)
-        input_image = input_image.convert("RGB")
         batch = preprocess(input_image).unsqueeze(0)
         prediction = resnet(batch).squeeze(0).softmax(0)
         class_id = prediction.argmax().item()
         score = prediction[class_id].item()
         category_name = weights.meta["categories"][class_id]
         if score >= 0.1:
-            prompt += f"{category_name}" if prompt == "" else f", {category_name}"
-        prompt = added_prompt if prompt == "" else f"{prompt}, {added_prompt}"
         ori_width, ori_height = input_image.size
-        rscale = upscale
-        input_image = input_image.resize(
-            (input_image.size[0] * rscale, input_image.size[1] * rscale)
-        )
-        input_image = input_image.resize(
-            (input_image.size[0] // 8 * 8, input_image.size[1] // 8 * 8)
-        )
         width, height = input_image.size
         try:
             image = validation_pipeline(
-                None,
-                prompt,
-                input_image,
-                num_inference_steps=denoise_steps,
-                generator=generator,
-                height=height,
-                width=width,
-                guidance_scale=guidance_scale,
-                negative_prompt=negative_prompt,
-                conditioning_scale=alpha,
-                eta=0.0,
-            ).images[0]
-            image = wavelet_color_fix(image, input_image)
-            image = image.resize((ori_width * rscale, ori_height * rscale))
         except Exception as e:
-            print(f"[inference] error: {e}")
             image = Image.new(mode="RGB", size=(512, 512))
-    result_path = f"result_{timestamp}.jpg"
-    input_path = f"input_{timestamp}.jpg"
-    image.save(result_path, "JPEG")
-    input_image.save(input_path, "JPEG")
-    return input_path, result_path, result_path
 css = """
 #col-container{
@@ -439,102 +203,53 @@ css = """
 }
 """
-with gr.Blocks() as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown("""
-##  PASD Magnify
-Pixel-Aware Stable Diffusion for Realistic Image Super-resolution and Personalized Stylization
- <a href='https://arxiv.org/abs/2308.14469' target='_blank'><img src='https://img.shields.io/badge/arXiv-2308.14469-red'></a> <a href='https://github.com/yangxy/PASD' target='_blank'><img src='https://img.shields.io/badge/GitHub-Code-blue'></a>
-""")
         with gr.Row():
             with gr.Column():
-                input_image = gr.Image(
-                    type="filepath",
-                    sources=["upload"],
-                    value="PASD/samples/frog.png",
-                    label="Input image",
-                )
                 prompt_in = gr.Textbox(label="Prompt", value="Frog")
                 with gr.Accordion(label="Advanced settings", open=False):
-                    added_prompt = gr.Textbox(
-                        label="Added Prompt",
-                        value="clean, high-resolution, 8k, best quality, masterpiece",
-                    )
-                    neg_prompt = gr.Textbox(
-                        label="Negative Prompt",
-                        value="dotted, noise, blur, lowres, oversmooth, longbody, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
-                    )
-                    denoise_steps = gr.Slider(
-                        label="Denoise Steps",
-                        minimum=10,
-                        maximum=50,
-                        value=20,
-                        step=1,
-                    )
-                    upsample_scale = gr.Slider(
-                        label="Upsample Scale",
-                        minimum=1,
-                        maximum=4,
-                        value=2,
-                        step=1,
-                    )
-                    condition_scale = gr.Slider(
-                        label="Conditioning Scale",
-                        minimum=0.5,
-                        maximum=1.5,
-                        value=1.1,
-                        step=0.1,
-                    )
-                    classifier_free_guidance = gr.Slider(
-                        label="Classifier-free Guidance",
-                        minimum=0.1,
-                        maximum=10.0,
-                        value=7.5,
-                        step=0.1,
-                    )
-                    seed = gr.Slider(
-                        label="Seed",
-                        minimum=-1,
-                        maximum=2147483647,
-                        step=1,
-                        randomize=True,
-                    )
                 submit_btn = gr.Button("Submit")
             with gr.Column():
-                before_img = gr.Image(label="Input")
-                after_img = gr.Image(label="Result")
                 file_output = gr.File(label="Downloadable image result")
-        submit_btn.click(
-            fn=super_resolve_image,
-            inputs=[
-                input_image,
-                prompt_in,
-                added_prompt,
-                neg_prompt,
-                denoise_steps,
-                upsample_scale,
-                condition_scale,
-                classifier_free_guidance,
-                seed,
-            ],
-            outputs=[
-                before_img,
-                after_img,
-                file_output,
-            ],
-            api_visibility="public",
-        )
-demo.queue(max_size=10).launch(
-    ssr_mode=False,
-    mcp_server=True,
-    css=css,
-)

 import torch
+import types
 torch.cuda.get_device_capability = lambda *args, **kwargs: (8, 6)
+torch.cuda.get_device_properties = lambda *args, **kwargs: types.SimpleNamespace(name='NVIDIA A10G', major=8, minor=6, total_memory=23836033024, multi_processor_count=80)
+import huggingface_hub
 huggingface_hub.snapshot_download(
+    repo_id='camenduru/PASD',
     allow_patterns=[
+        'pasd/**',
+        'pasd_light/**',
+        'pasd_light_rrdb/**',
+        'pasd_rrdb/**',
     ],
+    local_dir='PASD/runs',
+    local_dir_use_symlinks=False,
 )
 huggingface_hub.hf_hub_download(
+    repo_id='camenduru/PASD',
+    filename='majicmixRealistic_v6.safetensors',
+    local_dir='PASD/checkpoints/personalized_models',
+    local_dir_use_symlinks=False,
 )
 huggingface_hub.hf_hub_download(
+    repo_id='akhaliq/RetinaFace-R50',
+    filename='RetinaFace-R50.pth',
+    local_dir='PASD/annotator/ckpts',
+    local_dir_use_symlinks=False,
 )
+import sys; sys.path.append('./PASD')
+import spaces
+import os
+import datetime
+import einops
+import gradio as gr
+from gradio_imageslider import ImageSlider
+import numpy as np
+import torch
+import random
+from PIL import Image
+from pathlib import Path
+from torchvision import transforms
+import torch.nn.functional as F
+from torchvision.models import resnet50, ResNet50_Weights
+from pytorch_lightning import seed_everything
+from transformers import CLIPTextModel, CLIPTokenizer, CLIPImageProcessor
+from diffusers import AutoencoderKL, DDIMScheduler, PNDMScheduler, DPMSolverMultistepScheduler, UniPCMultistepScheduler
 from pipelines.pipeline_pasd import StableDiffusionControlNetPipeline
+from myutils.misc import load_dreambooth_lora, rand_name
 from myutils.wavelet_color_fix import wavelet_color_fix
 from annotator.retinaface import RetinaFaceDetection
     from models.pasd.unet_2d_condition import UNet2DConditionModel
     from models.pasd.controlnet import ControlNetModel
+pretrained_model_path = "runwayml/stable-diffusion-v1-5"
 ckpt_path = "PASD/runs/pasd/checkpoint-100000"
+#dreambooth_lora_path = "checkpoints/personalized_models/toonyou_beta3.safetensors"
 dreambooth_lora_path = "PASD/checkpoints/personalized_models/majicmixRealistic_v6.safetensors"
+#dreambooth_lora_path = "checkpoints/personalized_models/Realistic_Vision_V5.1.safetensors"
 weight_dtype = torch.float16
 device = "cuda"
+scheduler = UniPCMultistepScheduler.from_pretrained(pretrained_model_path, subfolder="scheduler")
+text_encoder = CLIPTextModel.from_pretrained(pretrained_model_path, subfolder="text_encoder")
+tokenizer = CLIPTokenizer.from_pretrained(pretrained_model_path, subfolder="tokenizer")
+vae = AutoencoderKL.from_pretrained(pretrained_model_path, subfolder="vae")
+feature_extractor = CLIPImageProcessor.from_pretrained(pretrained_model_path, subfolder="feature_extractor")
+unet = UNet2DConditionModel.from_pretrained(ckpt_path, subfolder="unet")
+controlnet = ControlNetModel.from_pretrained(ckpt_path, subfolder="controlnet")
 vae.requires_grad_(False)
 text_encoder.requires_grad_(False)
 unet.requires_grad_(False)
 controlnet.requires_grad_(False)
+unet, vae, text_encoder = load_dreambooth_lora(unet, vae, text_encoder, dreambooth_lora_path)
 text_encoder.to(device, dtype=weight_dtype)
 vae.to(device, dtype=weight_dtype)
 controlnet.to(device, dtype=weight_dtype)
 validation_pipeline = StableDiffusionControlNetPipeline(
+        vae=vae, text_encoder=text_encoder, tokenizer=tokenizer, feature_extractor=feature_extractor,
+        unet=unet, controlnet=controlnet, scheduler=scheduler, safety_checker=None, requires_safety_checker=False,
+    )
+#validation_pipeline.enable_vae_tiling()
 validation_pipeline._init_tiled_vae(decoder_tile_size=224)
 weights = ResNet50_Weights.DEFAULT
 preprocess = weights.transforms()
 resnet = resnet50(weights=weights)
 resnet.eval()
+def resize_image(image_path, target_height):
+    # Open the image file
     with Image.open(image_path) as img:
+        # Calculate the ratio to resize the image to the target height
         ratio = target_height / float(img.size[1])
+        # Calculate the new width based on the aspect ratio
         new_width = int(float(img.size[0]) * ratio)
+        # Resize the image
+        resized_img = img.resize((new_width, target_height), Image.LANCZOS)
+        # Save the resized image
+        #resized_img.save(output_path)
+        return resized_img
 @spaces.GPU(enable_queue=True)
+def inference(input_image, prompt, a_prompt, n_prompt, denoise_steps, upscale, alpha, cfg, seed):
+    #tempo fix for seed equals-1
     if seed == -1:
         seed = 0
     input_image = resize_image(input_image, 512)
+    process_size = 768
+    resize_preproc = transforms.Compose([
+        transforms.Resize(process_size, interpolation=transforms.InterpolationMode.BILINEAR),
+    ])
+    # Get the current timestamp
     timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
     with torch.no_grad():
         seed_everything(seed)
         generator = torch.Generator(device=device)
+        input_image = input_image.convert('RGB')
         batch = preprocess(input_image).unsqueeze(0)
         prediction = resnet(batch).squeeze(0).softmax(0)
         class_id = prediction.argmax().item()
         score = prediction[class_id].item()
         category_name = weights.meta["categories"][class_id]
         if score >= 0.1:
+            prompt += f"{category_name}" if prompt=='' else f", {category_name}"
+        prompt = a_prompt if prompt=='' else f"{prompt}, {a_prompt}"
         ori_width, ori_height = input_image.size
+        resize_flag = False
+        rscale = upscale
+        input_image = input_image.resize((input_image.size[0]*rscale, input_image.size[1]*rscale))
+        #if min(validation_image.size) < process_size:
+        #    validation_image = resize_preproc(validation_image)
+        input_image = input_image.resize((input_image.size[0]//8*8, input_image.size[1]//8*8))
         width, height = input_image.size
+        resize_flag = True #
         try:
             image = validation_pipeline(
+                    None, prompt, input_image, num_inference_steps=denoise_steps, generator=generator, height=height, width=width, guidance_scale=cfg,
+                    negative_prompt=n_prompt, conditioning_scale=alpha, eta=0.0,
+                ).images[0]
+            if True: #alpha<1.0:
+                image = wavelet_color_fix(image, input_image)
+            if resize_flag:
+                image = image.resize((ori_width*rscale, ori_height*rscale))
         except Exception as e:
+            print(e)
             image = Image.new(mode="RGB", size=(512, 512))
+    # Convert and save the image as JPEG
+    image.save(f'result_{timestamp}.jpg', 'JPEG')
+    # Convert and save the image as JPEG
+    input_image.save(f'input_{timestamp}.jpg', 'JPEG')
+    return (f"input_{timestamp}.jpg", f"result_{timestamp}.jpg"), f"result_{timestamp}.jpg"
+title = "Pixel-Aware Stable Diffusion for Real-ISR"
+description = "Gradio Demo for PASD Real-ISR. To use it, simply upload your image, or click one of the examples to load them."
+article = "<a href='https://github.com/yangxy/PASD' target='_blank'>Github Repo Pytorch</a>"
+#examples=[['samples/27d38eeb2dbbe7c9.png'],['samples/629e4da70703193b.png']]
 css = """
 #col-container{
 }
 """
+with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.HTML(f"""
+        <h2 style="text-align: center;">
+            PASD Magnify
+        </h2>
+        <p style="text-align: center;">
+            Pixel-Aware Stable Diffusion for Realistic Image Super-resolution and Personalized Stylization
+        </p>
+        <p id="project-links" align="center">
+            <a href='https://github.com/yangxy/PASD'><img src='https://img.shields.io/badge/Project-Page-Green'></a> <a href='https://huggingface.co/papers/2308.14469'><img src='https://img.shields.io/badge/Paper-Arxiv-red'></a>
+        </p>
+        <p style="margin:12px auto;display: flex;justify-content: center;">
+            <a href="https://huggingface.co/spaces/fffiloni/PASD?duplicate=true"><img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg.svg" alt="Duplicate this Space"></a>
+        </p>
+        """)
         with gr.Row():
             with gr.Column():
+                input_image = gr.Image(type="filepath", sources=["upload"], value="PASD/samples/frog.png")
                 prompt_in = gr.Textbox(label="Prompt", value="Frog")
                 with gr.Accordion(label="Advanced settings", open=False):
+                    added_prompt = gr.Textbox(label="Added Prompt", value='clean, high-resolution, 8k, best quality, masterpiece')
+                    neg_prompt = gr.Textbox(label="Negative Prompt",value='dotted, noise, blur, lowres, oversmooth, longbody, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality')
+                    denoise_steps = gr.Slider(label="Denoise Steps", minimum=10, maximum=50, value=20, step=1)
+                    upsample_scale = gr.Slider(label="Upsample Scale", minimum=1, maximum=4, value=2, step=1)
+                    condition_scale = gr.Slider(label="Conditioning Scale", minimum=0.5, maximum=1.5, value=1.1, step=0.1)
+                    classifier_free_guidance = gr.Slider(label="Classier-free Guidance", minimum=0.1, maximum=10.0, value=7.5, step=0.1)
+                    seed = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, randomize=True)
                 submit_btn = gr.Button("Submit")
             with gr.Column():
+                b_a_slider = ImageSlider(label="B/A result", position=0.5)
                 file_output = gr.File(label="Downloadable image result")
+    submit_btn.click(
+        fn = inference,
+        inputs = [
+            input_image, prompt_in,
+            added_prompt, neg_prompt,
+            denoise_steps,
+            upsample_scale, condition_scale,
+            classifier_free_guidance, seed
+        ],
+        outputs = [
+            b_a_slider,
+            file_output
+        ]
+    )
+demo.queue(max_size=10).launch(show_api=False)

requirements.txt CHANGED Viewed

@@ -1,20 +1,13 @@
-#gradio==6.12.0
-spaces>=0.48.1
-huggingface_hub==0.33.5
-diffusers==0.27.2
-transformers==4.35.2
-accelerate==0.24.1
-torch==2.8.0
-torchvision==0.23.0
-basicsr-fixed
 ultralytics
 salesforce-lavis
 webdataset
 pytorch_lightning
 spacy
-einops
-numpy
-pillow

+diffusers==0.28.2
+accelerate
+transformers==4.52.3
+xformers==0.0.29.post1
+basicsr
 ultralytics
 salesforce-lavis
 webdataset
 pytorch_lightning
+torch==2.5.1
+torchvision==0.20.1
 spacy
+gradio_imageslider