promptbackgroundchangedeprecated

Runtime error

App Files Files Community

HAL1993 commited on Jun 28, 2025

Commit

a673b7c

verified ·

1 Parent(s): 5ba8d58

Update app.py

Browse files

Files changed (1) hide show

app.py +107 -107

app.py CHANGED Viewed

@@ -4,7 +4,8 @@ import gradio as gr
 import numpy as np
 import torch
 import safetensors.torch as sf
-import requests
 from PIL import Image
 from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline
 from diffusers import AutoencoderKL, UNet2DConditionModel, DDIMScheduler, EulerAncestralDiscreteScheduler, DPMSolverMultistepScheduler
@@ -12,7 +13,11 @@ from diffusers.models.attention_processor import AttnProcessor2_0
 from transformers import CLIPTextModel, CLIPTokenizer
 from briarmbg import BriaRMBG
 from enum import Enum
 sd15_name = 'stablediffusionapi/realistic-vision-v51'
 tokenizer = CLIPTokenizer.from_pretrained(sd15_name, subfolder="tokenizer")
 text_encoder = CLIPTextModel.from_pretrained(sd15_name, subfolder="text_encoder")
@@ -20,6 +25,8 @@ vae = AutoencoderKL.from_pretrained(sd15_name, subfolder="vae")
 unet = UNet2DConditionModel.from_pretrained(sd15_name, subfolder="unet")
 rmbg = BriaRMBG.from_pretrained("briaai/RMBG-1.4")
 with torch.no_grad():
     new_conv_in = torch.nn.Conv2d(8, unet.conv_in.out_channels, unet.conv_in.kernel_size, unet.conv_in.stride, unet.conv_in.padding)
     new_conv_in.weight.zero_()
@@ -29,6 +36,7 @@ with torch.no_grad():
 unet_original_forward = unet.forward
 def hooked_unet_forward(sample, timestep, encoder_hidden_states, **kwargs):
     c_concat = kwargs['cross_attention_kwargs']['concat_conds'].to(sample)
     c_concat = torch.cat([c_concat] * (sample.shape[0] // c_concat.shape[0]), dim=0)
@@ -36,9 +44,13 @@ def hooked_unet_forward(sample, timestep, encoder_hidden_states, **kwargs):
     kwargs['cross_attention_kwargs'] = {}
     return unet_original_forward(new_sample, timestep, encoder_hidden_states, **kwargs)
 unet.forward = hooked_unet_forward
 model_path = './models/iclight_sd15_fc.safetensors'
 sd_offset = sf.load_file(model_path)
 sd_origin = unet.state_dict()
 keys = sd_origin.keys()
@@ -46,15 +58,21 @@ sd_merged = {k: sd_origin[k] + sd_offset[k] for k in sd_origin.keys()}
 unet.load_state_dict(sd_merged, strict=True)
 del sd_offset, sd_origin, sd_merged, keys
 device = torch.device('cuda')
 text_encoder = text_encoder.to(device=device, dtype=torch.float16)
 vae = vae.to(device=device, dtype=torch.bfloat16)
 unet = unet.to(device=device, dtype=torch.float16)
 rmbg = rmbg.to(device=device, dtype=torch.float32)
 unet.set_attn_processor(AttnProcessor2_0())
 vae.set_attn_processor(AttnProcessor2_0())
 ddim_scheduler = DDIMScheduler(
     num_train_timesteps=1000,
     beta_start=0.00085,
@@ -81,6 +99,8 @@ dpmpp_2m_sde_karras_scheduler = DPMSolverMultistepScheduler(
     steps_offset=1
 )
 t2i_pipe = StableDiffusionPipeline(
     vae=vae,
     text_encoder=text_encoder,
@@ -105,6 +125,7 @@ i2i_pipe = StableDiffusionImg2ImgPipeline(
     image_encoder=None
 )
 @torch.inference_mode()
 def encode_prompt_inner(txt: str):
     max_length = tokenizer.model_max_length
@@ -125,6 +146,7 @@ def encode_prompt_inner(txt: str):
     return conds
 @torch.inference_mode()
 def encode_prompt_pair(positive_prompt, negative_prompt):
     c = encode_prompt_inner(positive_prompt)
@@ -145,6 +167,7 @@ def encode_prompt_pair(positive_prompt, negative_prompt):
     return c, uc
 @torch.inference_mode()
 def pytorch2numpy(imgs, quant=True):
     results = []
@@ -161,12 +184,14 @@ def pytorch2numpy(imgs, quant=True):
         results.append(y)
     return results
 @torch.inference_mode()
 def numpy2pytorch(imgs):
-    h = torch.from_numpy(np.stack(imgs, axis=0)).float() / 127.0 - 1.0
     h = h.movedim(-1, 1)
     return h
 def resize_and_center_crop(image, target_width, target_height):
     pil_image = Image.fromarray(image)
     original_width, original_height = pil_image.size
@@ -181,11 +206,13 @@ def resize_and_center_crop(image, target_width, target_height):
     cropped_image = resized_image.crop((left, top, right, bottom))
     return np.array(cropped_image)
 def resize_without_crop(image, target_width, target_height):
     pil_image = Image.fromarray(image)
     resized_image = pil_image.resize((target_width, target_height), Image.LANCZOS)
     return np.array(resized_image)
 @torch.inference_mode()
 def run_rmbg(img, sigma=0.0):
     H, W, C = img.shape
@@ -200,42 +227,9 @@ def run_rmbg(img, sigma=0.0):
     result = 127 + (img.astype(np.float32) - 127 + sigma) * alpha
     return result.clip(0, 255).astype(np.uint8), alpha
-@spaces.GPU
-def translate_albanian_to_english(text):
-    """Translate Albanian to English using sepioo-facebook-translation API."""
-    if not text.strip():
-        return ""
-    for attempt in range(2):
-        try:
-            response = requests.post(
-                "https://hal1993-mdftranslation1234567890abcdef1234567890-fc073a6.hf.space/v1/translate",
-                json={"from_language": "sq", "to_language": "en", "input_text": text},
-                headers={"accept": "application/json", "Content-Type": "application/json"},
-                timeout=5
-            )
-            response.raise_for_status()
-            translated = response.json().get("translate", "")
-            print(f"Translation response: {translated}")
-            return translated
-        except Exception as e:
-            print(f"Translation error (attempt {attempt + 1}): {e}")
-            if attempt == 1:
-                return f"Përkthimi dështoi: {str(e)}"
-    return f"Përkthimi dështoi"
-@spaces.GPU
 @torch.inference_mode()
-def process(input_fg, prompt_albanian, image_width, image_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, lowres_denoise, bg_source):
-    if not input_fg.any():
-        return None, None
-    if not prompt_albanian.strip():
-        prompt = ""
-    else:
-        prompt = translate_albanian_to_english(prompt_albanian)
-        if prompt.startswith("Përkthimi dështoi"):
-            return None, None
     bg_source = BGSource(bg_source)
     input_bg = None
@@ -338,96 +332,102 @@ def process(input_fg, prompt_albanian, image_width, image_height, num_samples, s
     return pytorch2numpy(pixels)
 @spaces.GPU
 @torch.inference_mode()
-def process_relight(input_fg, prompt_albanian, image_width, image_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, lowres_denoise, bg_source):
-    if not input_fg.any():
-        return None, None
     input_fg, matting = run_rmbg(input_fg)
-    results = process(input_fg, prompt_albanian, image_width, image_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, lowres_denoise, bg_source)
     return input_fg, results
-def update_resolution(aspect_ratio):
-    if aspect_ratio == "9:16":
-        return 512, 910
-    elif aspect_ratio == "1:1":
-        return 640, 640
-    elif aspect_ratio == "16:9":
-        return 910, 512
-    return 512, 910  # Default to 9:16
 class BGSource(Enum):
-    NONE = "Asnjë"
-    LEFT = "Dritë nga Majtas"
-    RIGHT = "Dritë nga Djathtas"
-    TOP = "Dritë nga Sipër"
-    BOTTOM = "Dritë nga Poshtë"
-css = """
-body::before {
-    content: "";
-    display: block;
-    height: 320px;
-    background-color: var(--body-background-fill);
-}
-button[aria-label="Fullscreen"], button[aria-label="Fullscreen"]:hover {
-    display: none !important;
-    visibility: hidden !important;
-    opacity: 0 !important;
-    pointer-events: none !important;
-}
-button[aria-label="Share"], button[aria-label="Share"]:hover {
-    display: none !important;
-}
-button[aria-label="Download"] {
-    transform: scale(3);
-    transform-origin: top right;
-    margin: 0 !important;
-    padding: 6px !important;
-}
-"""
-block = gr.Blocks(css=css).queue()
 with block:
     with gr.Row():
         with gr.Column():
             with gr.Row():
-                input_fg = gr.Image(sources='upload', type="numpy", label="Imazhi i Hyrjes", height=480)
-                output_bg = gr.Image(type="numpy", label="Sfondi i Përpunuar", height=480)
-            prompt_albanian = gr.Textbox(label="Përshkrimi")
             bg_source = gr.Radio(choices=[e.value for e in BGSource],
                                  value=BGSource.NONE.value,
-                                 label="Preferenca e Ndriçimit (Latenti Fillestar)", type='value')
-            aspect_ratio = gr.Radio(choices=["9:16", "1:1", "16:9"], value="9:16", label="Raporti i Imazhit")
-            relight_button = gr.Button(value="Gjenero")
             with gr.Group():
                 with gr.Row():
-                    num_samples = gr.Slider(label="Numri i Imazheve", minimum=1, maximum=12, value=1, step=1, visible=False)
-                    seed = gr.Number(label="Farë", value=-1, precision=0, visible=False)
-                    image_width = gr.State(value=512)
-                    image_height = gr.State(value=910)
-            with gr.Accordion("Opsionet e Avancuara", open=False, visible=False):
-                steps = gr.Slider(label="Hapat", minimum=1, maximum=100, value=50, step=1)
-                cfg = gr.Slider(label="Shkalla CFG", minimum=1.0, maximum=32.0, value=2, step=0.01)
-                lowres_denoise = gr.Slider(label="Denoise për Rezolutë të Ulët (për latent fillestar)", minimum=0.1, maximum=1.0, value=0.9, step=0.01)
-                highres_scale = gr.Slider(label="Shkalla e Rezolutës së Lartë", minimum=1.0, maximum=3.0, value=2, step=0.01)
-                highres_denoise = gr.Slider(label="Denoise për Rezolutë të Lartë", minimum=0.1, maximum=1.0, value=1, step=0.01)
                 a_prompt = gr.Textbox(label="Added Prompt", value='best quality')
                 n_prompt = gr.Textbox(label="Negative Prompt", value='lowres, bad anatomy, bad hands, cropped, worst quality')
         with gr.Column():
-            result_gallery = gr.Gallery(height=832, object_fit='contain', label='Rezultatet')
     with gr.Row():
-        dummy_image_for_outputs = gr.Image(visible=False, label='Rezultati')
-    ips = [input_fg, prompt_albanian, image_width, image_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, lowres_denoise, bg_source]
-    aspect_ratio.change(
-        fn=update_resolution,
-        inputs=[aspect_ratio],
-        outputs=[image_width, image_height],
-        queue=False
-    )
     relight_button.click(fn=process_relight, inputs=ips, outputs=[output_bg, result_gallery])
-block.launch(server_name='0.0.0.0')

 import numpy as np
 import torch
 import safetensors.torch as sf
+import db_examples
 from PIL import Image
 from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline
 from diffusers import AutoencoderKL, UNet2DConditionModel, DDIMScheduler, EulerAncestralDiscreteScheduler, DPMSolverMultistepScheduler
 from transformers import CLIPTextModel, CLIPTokenizer
 from briarmbg import BriaRMBG
 from enum import Enum
+# from torch.hub import download_url_to_file
+# 'stablediffusionapi/realistic-vision-v51'
+# 'runwayml/stable-diffusion-v1-5'
 sd15_name = 'stablediffusionapi/realistic-vision-v51'
 tokenizer = CLIPTokenizer.from_pretrained(sd15_name, subfolder="tokenizer")
 text_encoder = CLIPTextModel.from_pretrained(sd15_name, subfolder="text_encoder")
 unet = UNet2DConditionModel.from_pretrained(sd15_name, subfolder="unet")
 rmbg = BriaRMBG.from_pretrained("briaai/RMBG-1.4")
+# Change UNet
 with torch.no_grad():
     new_conv_in = torch.nn.Conv2d(8, unet.conv_in.out_channels, unet.conv_in.kernel_size, unet.conv_in.stride, unet.conv_in.padding)
     new_conv_in.weight.zero_()
 unet_original_forward = unet.forward
 def hooked_unet_forward(sample, timestep, encoder_hidden_states, **kwargs):
     c_concat = kwargs['cross_attention_kwargs']['concat_conds'].to(sample)
     c_concat = torch.cat([c_concat] * (sample.shape[0] // c_concat.shape[0]), dim=0)
     kwargs['cross_attention_kwargs'] = {}
     return unet_original_forward(new_sample, timestep, encoder_hidden_states, **kwargs)
 unet.forward = hooked_unet_forward
+# Load
 model_path = './models/iclight_sd15_fc.safetensors'
+# download_url_to_file(url='https://huggingface.co/lllyasviel/ic-light/resolve/main/iclight_sd15_fc.safetensors', dst=model_path)
 sd_offset = sf.load_file(model_path)
 sd_origin = unet.state_dict()
 keys = sd_origin.keys()
 unet.load_state_dict(sd_merged, strict=True)
 del sd_offset, sd_origin, sd_merged, keys
+# Device
 device = torch.device('cuda')
 text_encoder = text_encoder.to(device=device, dtype=torch.float16)
 vae = vae.to(device=device, dtype=torch.bfloat16)
 unet = unet.to(device=device, dtype=torch.float16)
 rmbg = rmbg.to(device=device, dtype=torch.float32)
+# SDP
 unet.set_attn_processor(AttnProcessor2_0())
 vae.set_attn_processor(AttnProcessor2_0())
+# Samplers
 ddim_scheduler = DDIMScheduler(
     num_train_timesteps=1000,
     beta_start=0.00085,
     steps_offset=1
 )
+# Pipelines
 t2i_pipe = StableDiffusionPipeline(
     vae=vae,
     text_encoder=text_encoder,
     image_encoder=None
 )
 @torch.inference_mode()
 def encode_prompt_inner(txt: str):
     max_length = tokenizer.model_max_length
     return conds
 @torch.inference_mode()
 def encode_prompt_pair(positive_prompt, negative_prompt):
     c = encode_prompt_inner(positive_prompt)
     return c, uc
 @torch.inference_mode()
 def pytorch2numpy(imgs, quant=True):
     results = []
         results.append(y)
     return results
 @torch.inference_mode()
 def numpy2pytorch(imgs):
+    h = torch.from_numpy(np.stack(imgs, axis=0)).float() / 127.0 - 1.0  # so that 127 must be strictly 0.0
     h = h.movedim(-1, 1)
     return h
 def resize_and_center_crop(image, target_width, target_height):
     pil_image = Image.fromarray(image)
     original_width, original_height = pil_image.size
     cropped_image = resized_image.crop((left, top, right, bottom))
     return np.array(cropped_image)
 def resize_without_crop(image, target_width, target_height):
     pil_image = Image.fromarray(image)
     resized_image = pil_image.resize((target_width, target_height), Image.LANCZOS)
     return np.array(resized_image)
 @torch.inference_mode()
 def run_rmbg(img, sigma=0.0):
     H, W, C = img.shape
     result = 127 + (img.astype(np.float32) - 127 + sigma) * alpha
     return result.clip(0, 255).astype(np.uint8), alpha
 @torch.inference_mode()
+def process(input_fg, prompt, image_width, image_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, lowres_denoise, bg_source):
     bg_source = BGSource(bg_source)
     input_bg = None
     return pytorch2numpy(pixels)
 @spaces.GPU
 @torch.inference_mode()
+def process_relight(input_fg, prompt, image_width, image_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, lowres_denoise, bg_source):
     input_fg, matting = run_rmbg(input_fg)
+    results = process(input_fg, prompt, image_width, image_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, lowres_denoise, bg_source)
     return input_fg, results
+quick_prompts = [
+    'sunshine from window',
+    'neon light, city',
+    'sunset over sea',
+    'golden time',
+    'sci-fi RGB glowing, cyberpunk',
+    'natural lighting',
+    'warm atmosphere, at home, bedroom',
+    'magic lit',
+    'evil, gothic, Yharnam',
+    'light and shadow',
+    'shadow from window',
+    'soft studio lighting',
+    'home atmosphere, cozy bedroom illumination',
+    'neon, Wong Kar-wai, warm'
+]
+quick_prompts = [[x] for x in quick_prompts]
+quick_subjects = [
+    'beautiful woman, detailed face',
+    'handsome man, detailed face',
+]
+quick_subjects = [[x] for x in quick_subjects]
 class BGSource(Enum):
+    NONE = "None"
+    LEFT = "Left Light"
+    RIGHT = "Right Light"
+    TOP = "Top Light"
+    BOTTOM = "Bottom Light"
+block = gr.Blocks().queue()
 with block:
+    with gr.Row():
+        gr.Markdown("## IC-Light (Relighting with Foreground Condition)")
+    with gr.Row():
+        gr.Markdown("See also https://github.com/lllyasviel/IC-Light for background-conditioned model and normal estimation")
     with gr.Row():
         with gr.Column():
             with gr.Row():
+                input_fg = gr.Image(sources='upload', type="numpy", label="Image", height=480)
+                output_bg = gr.Image(type="numpy", label="Preprocessed Foreground", height=480)
+            prompt = gr.Textbox(label="Prompt")
             bg_source = gr.Radio(choices=[e.value for e in BGSource],
                                  value=BGSource.NONE.value,
+                                 label="Lighting Preference (Initial Latent)", type='value')
+            example_quick_subjects = gr.Dataset(samples=quick_subjects, label='Subject Quick List', samples_per_page=1000, components=[prompt])
+            example_quick_prompts = gr.Dataset(samples=quick_prompts, label='Lighting Quick List', samples_per_page=1000, components=[prompt])
+            relight_button = gr.Button(value="Relight")
             with gr.Group():
                 with gr.Row():
+                    num_samples = gr.Slider(label="Images", minimum=1, maximum=12, value=1, step=1)
+                    seed = gr.Number(label="Seed", value=12345, precision=0)
+                with gr.Row():
+                    image_width = gr.Slider(label="Image Width", minimum=256, maximum=1024, value=512, step=64)
+                    image_height = gr.Slider(label="Image Height", minimum=256, maximum=1024, value=640, step=64)
+            with gr.Accordion("Advanced options", open=False):
+                steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=25, step=1)
+                cfg = gr.Slider(label="CFG Scale", minimum=1.0, maximum=32.0, value=2, step=0.01)
+                lowres_denoise = gr.Slider(label="Lowres Denoise (for initial latent)", minimum=0.1, maximum=1.0, value=0.9, step=0.01)
+                highres_scale = gr.Slider(label="Highres Scale", minimum=1.0, maximum=3.0, value=1.5, step=0.01)
+                highres_denoise = gr.Slider(label="Highres Denoise", minimum=0.1, maximum=1.0, value=0.5, step=0.01)
                 a_prompt = gr.Textbox(label="Added Prompt", value='best quality')
                 n_prompt = gr.Textbox(label="Negative Prompt", value='lowres, bad anatomy, bad hands, cropped, worst quality')
         with gr.Column():
+            result_gallery = gr.Gallery(height=832, object_fit='contain', label='Outputs')
     with gr.Row():
+        dummy_image_for_outputs = gr.Image(visible=False, label='Result')
+        gr.Examples(
+            fn=lambda *args: [[args[-1]], "imgs/dummy.png"],
+            examples=db_examples.foreground_conditioned_examples,
+            inputs=[
+                input_fg, prompt, bg_source, image_width, image_height, seed, dummy_image_for_outputs
+            ],
+            outputs=[result_gallery, output_bg],
+            run_on_click=True, examples_per_page=1024
+        )
+    ips = [input_fg, prompt, image_width, image_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, lowres_denoise, bg_source]
     relight_button.click(fn=process_relight, inputs=ips, outputs=[output_bg, result_gallery])
+    example_quick_prompts.click(lambda x, y: ', '.join(y.split(', ')[:2] + [x[0]]), inputs=[example_quick_prompts, prompt], outputs=prompt, show_progress=False, queue=False)
+    example_quick_subjects.click(lambda x: x[0], inputs=example_quick_subjects, outputs=prompt, show_progress=False, queue=False)
+block.launch(server_name='0.0.0.0')