promptbackgroundchangedeprecated

Runtime error

App Files Files Community

HAL1993 commited on Jun 28, 2025

Commit

522960f

verified ·

1 Parent(s): a673b7c

Update app.py

Browse files

Files changed (1) hide show

app.py +91 -121

app.py CHANGED Viewed

@@ -13,11 +13,9 @@ from diffusers.models.attention_processor import AttnProcessor2_0
 from transformers import CLIPTextModel, CLIPTokenizer
 from briarmbg import BriaRMBG
 from enum import Enum
-# from torch.hub import download_url_to_file
-# 'stablediffusionapi/realistic-vision-v51'
-# 'runwayml/stable-diffusion-v1-5'
 sd15_name = 'stablediffusionapi/realistic-vision-v51'
 tokenizer = CLIPTokenizer.from_pretrained(sd15_name, subfolder="tokenizer")
 text_encoder = CLIPTextModel.from_pretrained(sd15_name, subfolder="text_encoder")
@@ -25,8 +23,7 @@ vae = AutoencoderKL.from_pretrained(sd15_name, subfolder="vae")
 unet = UNet2DConditionModel.from_pretrained(sd15_name, subfolder="unet")
 rmbg = BriaRMBG.from_pretrained("briaai/RMBG-1.4")
-# Change UNet
 with torch.no_grad():
     new_conv_in = torch.nn.Conv2d(8, unet.conv_in.out_channels, unet.conv_in.kernel_size, unet.conv_in.stride, unet.conv_in.padding)
     new_conv_in.weight.zero_()
@@ -36,7 +33,6 @@ with torch.no_grad():
 unet_original_forward = unet.forward
 def hooked_unet_forward(sample, timestep, encoder_hidden_states, **kwargs):
     c_concat = kwargs['cross_attention_kwargs']['concat_conds'].to(sample)
     c_concat = torch.cat([c_concat] * (sample.shape[0] // c_concat.shape[0]), dim=0)
@@ -44,13 +40,10 @@ def hooked_unet_forward(sample, timestep, encoder_hidden_states, **kwargs):
     kwargs['cross_attention_kwargs'] = {}
     return unet_original_forward(new_sample, timestep, encoder_hidden_states, **kwargs)
 unet.forward = hooked_unet_forward
-# Load
 model_path = './models/iclight_sd15_fc.safetensors'
-# download_url_to_file(url='https://huggingface.co/lllyasviel/ic-light/resolve/main/iclight_sd15_fc.safetensors', dst=model_path)
 sd_offset = sf.load_file(model_path)
 sd_origin = unet.state_dict()
 keys = sd_origin.keys()
@@ -58,21 +51,18 @@ sd_merged = {k: sd_origin[k] + sd_offset[k] for k in sd_origin.keys()}
 unet.load_state_dict(sd_merged, strict=True)
 del sd_offset, sd_origin, sd_merged, keys
-# Device
 device = torch.device('cuda')
 text_encoder = text_encoder.to(device=device, dtype=torch.float16)
 vae = vae.to(device=device, dtype=torch.bfloat16)
 unet = unet.to(device=device, dtype=torch.float16)
 rmbg = rmbg.to(device=device, dtype=torch.float32)
-# SDP
 unet.set_attn_processor(AttnProcessor2_0())
 vae.set_attn_processor(AttnProcessor2_0())
-# Samplers
 ddim_scheduler = DDIMScheduler(
     num_train_timesteps=1000,
     beta_start=0.00085,
@@ -99,8 +89,7 @@ dpmpp_2m_sde_karras_scheduler = DPMSolverMultistepScheduler(
     steps_offset=1
 )
-# Pipelines
 t2i_pipe = StableDiffusionPipeline(
     vae=vae,
     text_encoder=text_encoder,
@@ -125,7 +114,28 @@ i2i_pipe = StableDiffusionImg2ImgPipeline(
     image_encoder=None
 )
 @torch.inference_mode()
 def encode_prompt_inner(txt: str):
     max_length = tokenizer.model_max_length
@@ -146,7 +156,6 @@ def encode_prompt_inner(txt: str):
     return conds
 @torch.inference_mode()
 def encode_prompt_pair(positive_prompt, negative_prompt):
     c = encode_prompt_inner(positive_prompt)
@@ -167,31 +176,26 @@ def encode_prompt_pair(positive_prompt, negative_prompt):
     return c, uc
 @torch.inference_mode()
 def pytorch2numpy(imgs, quant=True):
     results = []
     for x in imgs:
         y = x.movedim(0, -1)
         if quant:
             y = y * 127.5 + 127.5
             y = y.detach().float().cpu().numpy().clip(0, 255).astype(np.uint8)
         else:
             y = y * 0.5 + 0.5
             y = y.detach().float().cpu().numpy().clip(0, 1).astype(np.float32)
         results.append(y)
     return results
 @torch.inference_mode()
 def numpy2pytorch(imgs):
-    h = torch.from_numpy(np.stack(imgs, axis=0)).float() / 127.0 - 1.0  # so that 127 must be strictly 0.0
     h = h.movedim(-1, 1)
     return h
 def resize_and_center_crop(image, target_width, target_height):
     pil_image = Image.fromarray(image)
     original_width, original_height = pil_image.size
@@ -206,13 +210,11 @@ def resize_and_center_crop(image, target_width, target_height):
     cropped_image = resized_image.crop((left, top, right, bottom))
     return np.array(cropped_image)
 def resize_without_crop(image, target_width, target_height):
     pil_image = Image.fromarray(image)
     resized_image = pil_image.resize((target_width, target_height), Image.LANCZOS)
     return np.array(resized_image)
 @torch.inference_mode()
 def run_rmbg(img, sigma=0.0):
     H, W, C = img.shape
@@ -227,7 +229,6 @@ def run_rmbg(img, sigma=0.0):
     result = 127 + (img.astype(np.float32) - 127 + sigma) * alpha
     return result.clip(0, 255).astype(np.uint8), alpha
 @torch.inference_mode()
 def process(input_fg, prompt, image_width, image_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, lowres_denoise, bg_source):
     bg_source = BGSource(bg_source)
@@ -332,102 +333,71 @@ def process(input_fg, prompt, image_width, image_height, num_samples, seed, step
     return pytorch2numpy(pixels)
 @spaces.GPU
 @torch.inference_mode()
 def process_relight(input_fg, prompt, image_width, image_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, lowres_denoise, bg_source):
     input_fg, matting = run_rmbg(input_fg)
-    results = process(input_fg, prompt, image_width, image_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, lowres_denoise, bg_source)
     return input_fg, results
-quick_prompts = [
-    'sunshine from window',
-    'neon light, city',
-    'sunset over sea',
-    'golden time',
-    'sci-fi RGB glowing, cyberpunk',
-    'natural lighting',
-    'warm atmosphere, at home, bedroom',
-    'magic lit',
-    'evil, gothic, Yharnam',
-    'light and shadow',
-    'shadow from window',
-    'soft studio lighting',
-    'home atmosphere, cozy bedroom illumination',
-    'neon, Wong Kar-wai, warm'
-]
-quick_prompts = [[x] for x in quick_prompts]
-quick_subjects = [
-    'beautiful woman, detailed face',
-    'handsome man, detailed face',
-]
-quick_subjects = [[x] for x in quick_subjects]
 class BGSource(Enum):
-    NONE = "None"
-    LEFT = "Left Light"
-    RIGHT = "Right Light"
-    TOP = "Top Light"
-    BOTTOM = "Bottom Light"
-block = gr.Blocks().queue()
-with block:
-    with gr.Row():
-        gr.Markdown("## IC-Light (Relighting with Foreground Condition)")
-    with gr.Row():
-        gr.Markdown("See also https://github.com/lllyasviel/IC-Light for background-conditioned model and normal estimation")
-    with gr.Row():
-        with gr.Column():
-            with gr.Row():
-                input_fg = gr.Image(sources='upload', type="numpy", label="Image", height=480)
-                output_bg = gr.Image(type="numpy", label="Preprocessed Foreground", height=480)
-            prompt = gr.Textbox(label="Prompt")
-            bg_source = gr.Radio(choices=[e.value for e in BGSource],
-                                 value=BGSource.NONE.value,
-                                 label="Lighting Preference (Initial Latent)", type='value')
-            example_quick_subjects = gr.Dataset(samples=quick_subjects, label='Subject Quick List', samples_per_page=1000, components=[prompt])
-            example_quick_prompts = gr.Dataset(samples=quick_prompts, label='Lighting Quick List', samples_per_page=1000, components=[prompt])
-            relight_button = gr.Button(value="Relight")
-            with gr.Group():
-                with gr.Row():
-                    num_samples = gr.Slider(label="Images", minimum=1, maximum=12, value=1, step=1)
-                    seed = gr.Number(label="Seed", value=12345, precision=0)
-                with gr.Row():
-                    image_width = gr.Slider(label="Image Width", minimum=256, maximum=1024, value=512, step=64)
-                    image_height = gr.Slider(label="Image Height", minimum=256, maximum=1024, value=640, step=64)
-            with gr.Accordion("Advanced options", open=False):
-                steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=25, step=1)
-                cfg = gr.Slider(label="CFG Scale", minimum=1.0, maximum=32.0, value=2, step=0.01)
-                lowres_denoise = gr.Slider(label="Lowres Denoise (for initial latent)", minimum=0.1, maximum=1.0, value=0.9, step=0.01)
-                highres_scale = gr.Slider(label="Highres Scale", minimum=1.0, maximum=3.0, value=1.5, step=0.01)
-                highres_denoise = gr.Slider(label="Highres Denoise", minimum=0.1, maximum=1.0, value=0.5, step=0.01)
-                a_prompt = gr.Textbox(label="Added Prompt", value='best quality')
-                n_prompt = gr.Textbox(label="Negative Prompt", value='lowres, bad anatomy, bad hands, cropped, worst quality')
         with gr.Column():
-            result_gallery = gr.Gallery(height=832, object_fit='contain', label='Outputs')
-    with gr.Row():
-        dummy_image_for_outputs = gr.Image(visible=False, label='Result')
-        gr.Examples(
-            fn=lambda *args: [[args[-1]], "imgs/dummy.png"],
-            examples=db_examples.foreground_conditioned_examples,
-            inputs=[
-                input_fg, prompt, bg_source, image_width, image_height, seed, dummy_image_for_outputs
-            ],
-            outputs=[result_gallery, output_bg],
-            run_on_click=True, examples_per_page=1024
-        )
-    ips = [input_fg, prompt, image_width, image_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, lowres_denoise, bg_source]
-    relight_button.click(fn=process_relight, inputs=ips, outputs=[output_bg, result_gallery])
-    example_quick_prompts.click(lambda x, y: ', '.join(y.split(', ')[:2] + [x[0]]), inputs=[example_quick_prompts, prompt], outputs=prompt, show_progress=False, queue=False)
-    example_quick_subjects.click(lambda x: x[0], inputs=example_quick_subjects, outputs=prompt, show_progress=False, queue=False)
-block.launch(server_name='0.0.0.0')

 from transformers import CLIPTextModel, CLIPTokenizer
 from briarmbg import BriaRMBG
 from enum import Enum
+import requests
+# Model setup (unchanged)
 sd15_name = 'stablediffusionapi/realistic-vision-v51'
 tokenizer = CLIPTokenizer.from_pretrained(sd15_name, subfolder="tokenizer")
 text_encoder = CLIPTextModel.from_pretrained(sd15_name, subfolder="text_encoder")
 unet = UNet2DConditionModel.from_pretrained(sd15_name, subfolder="unet")
 rmbg = BriaRMBG.from_pretrained("briaai/RMBG-1.4")
+# Change UNet (unchanged)
 with torch.no_grad():
     new_conv_in = torch.nn.Conv2d(8, unet.conv_in.out_channels, unet.conv_in.kernel_size, unet.conv_in.stride, unet.conv_in.padding)
     new_conv_in.weight.zero_()
 unet_original_forward = unet.forward
 def hooked_unet_forward(sample, timestep, encoder_hidden_states, **kwargs):
     c_concat = kwargs['cross_attention_kwargs']['concat_conds'].to(sample)
     c_concat = torch.cat([c_concat] * (sample.shape[0] // c_concat.shape[0]), dim=0)
     kwargs['cross_attention_kwargs'] = {}
     return unet_original_forward(new_sample, timestep, encoder_hidden_states, **kwargs)
 unet.forward = hooked_unet_forward
+# Load model (unchanged)
 model_path = './models/iclight_sd15_fc.safetensors'
 sd_offset = sf.load_file(model_path)
 sd_origin = unet.state_dict()
 keys = sd_origin.keys()
 unet.load_state_dict(sd_merged, strict=True)
 del sd_offset, sd_origin, sd_merged, keys
+# Device setup (unchanged)
 device = torch.device('cuda')
 text_encoder = text_encoder.to(device=device, dtype=torch.float16)
 vae = vae.to(device=device, dtype=torch.bfloat16)
 unet = unet.to(device=device, dtype=torch.float16)
 rmbg = rmbg.to(device=device, dtype=torch.float32)
+# SDP (unchanged)
 unet.set_attn_processor(AttnProcessor2_0())
 vae.set_attn_processor(AttnProcessor2_0())
+# Samplers (unchanged)
 ddim_scheduler = DDIMScheduler(
     num_train_timesteps=1000,
     beta_start=0.00085,
     steps_offset=1
 )
+# Pipelines (unchanged)
 t2i_pipe = StableDiffusionPipeline(
     vae=vae,
     text_encoder=text_encoder,
     image_encoder=None
 )
+# Translation function (adapted from example)
+@spaces.GPU
+def translate_albanian_to_english(text):
+    if not text.strip():
+        return ""
+    for attempt in range(2):
+        try:
+            response = requests.post(
+                "https://hal1993-mdftranslation1234567890abcdef1234567890-fc073a6.hf.space/v1/translate",
+                json={"from_language": "sq", "to_language": "en", "input_text": text},
+                headers={"accept": "application/json", "Content-Type": "application/json"},
+                timeout=5
+            )
+            response.raise_for_status()
+            translated = response.json().get("translate", "")
+            return translated
+        except Exception as e:
+            if attempt == 1:
+                return f"Përkthimi dështoi: {str(e)}"
+    return f"Përkthimi dështoi"
+# Core processing functions (unchanged)
 @torch.inference_mode()
 def encode_prompt_inner(txt: str):
     max_length = tokenizer.model_max_length
     return conds
 @torch.inference_mode()
 def encode_prompt_pair(positive_prompt, negative_prompt):
     c = encode_prompt_inner(positive_prompt)
     return c, uc
 @torch.inference_mode()
 def pytorch2numpy(imgs, quant=True):
     results = []
     for x in imgs:
         y = x.movedim(0, -1)
         if quant:
             y = y * 127.5 + 127.5
             y = y.detach().float().cpu().numpy().clip(0, 255).astype(np.uint8)
         else:
             y = y * 0.5 + 0.5
             y = y.detach().float().cpu().numpy().clip(0, 1).astype(np.float32)
         results.append(y)
     return results
 @torch.inference_mode()
 def numpy2pytorch(imgs):
+    h = torch.from_numpy(np.stack(imgs, axis=0)).float() / 127.0 - 1.0
     h = h.movedim(-1, 1)
     return h
 def resize_and_center_crop(image, target_width, target_height):
     pil_image = Image.fromarray(image)
     original_width, original_height = pil_image.size
     cropped_image = resized_image.crop((left, top, right, bottom))
     return np.array(cropped_image)
 def resize_without_crop(image, target_width, target_height):
     pil_image = Image.fromarray(image)
     resized_image = pil_image.resize((target_width, target_height), Image.LANCZOS)
     return np.array(resized_image)
 @torch.inference_mode()
 def run_rmbg(img, sigma=0.0):
     H, W, C = img.shape
     result = 127 + (img.astype(np.float32) - 127 + sigma) * alpha
     return result.clip(0, 255).astype(np.uint8), alpha
 @torch.inference_mode()
 def process(input_fg, prompt, image_width, image_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, lowres_denoise, bg_source):
     bg_source = BGSource(bg_source)
     return pytorch2numpy(pixels)
 @spaces.GPU
 @torch.inference_mode()
 def process_relight(input_fg, prompt, image_width, image_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, lowres_denoise, bg_source):
+    # Translate Albanian prompt to English
+    prompt_english = translate_albanian_to_english(prompt)
+    if prompt_english.startswith("Përkthimi dështoi"):
+        return None, None
     input_fg, matting = run_rmbg(input_fg)
+    results = process(input_fg, prompt_english, image_width, image_height, num_samples, seed, steps, a_prompt, n_prompt, cfg, highres_scale, highres_denoise, lowres_denoise, bg_source)
     return input_fg, results
+# Enum for background source (translated to Albanian)
 class BGSource(Enum):
+    NONE = "Asnjë"
+    LEFT = "Dritë nga e Majta"
+    RIGHT = "Dritë nga e Djathta"
+    TOP = "Dritë nga Sipër"
+    BOTTOM = "Dritë nga Poshtë"
+# UI Layout
+def create_demo():
+    with gr.Blocks() as block:
+        # CSS for 320px gap and download button scaling
+        gr.HTML("""
+        <style>
+        body::before {
+            content: "";
+            display: block;
+            height: 320px;
+            background-color: var(--body-background-fill);
+        }
+        button[aria-label="Fullscreen"], button[aria-label="Fullscreen"]:hover {
+            display: none !important;
+            visibility: hidden !important;
+            opacity: 0 !important;
+            pointer-events: none !important;
+        }
+        button[aria-label="Share"], button[aria-label="Share"]:hover {
+            display: none !important;
+        }
+        button[aria-label="Download"] {
+            transform: scale(3);
+            transform-origin: top right;
+            margin: 0 !important;
+            padding: 6px !important;
+        }
+        </style>
+        """)
         with gr.Column():
+            input_fg = gr.Image(sources='upload', type="numpy", label="Imazhi i Ngarkuar", height=480)
+            prompt = gr.Textbox(label="Përshkrimi", placeholder="Shkruani përshkrimin këtu")
+            bg_source = gr.Radio(choices=[e.value for e in BGSource], value=BGSource.NONE.value, label="Preferenca e Ndriçimit", type='value')
+            relight_button = gr.Button(value="Rindriço")
+            result_gallery = gr.Gallery(label='Rezultatet', visible=False)  # Hidden output
+            output_bg = gr.Image(type="numpy", label="Parapërpunimi i Planit të Parë", visible=False)  # Hidden output
+        # Bind the relight button
+        ips = [input_fg, prompt, 512, 640, 1, 12345, 25, 'best quality', 'lowres, bad anatomy, bad hands, cropped, worst quality', 2, 1.5, 0.5, 0.9, bg_source]
+        relight_button.click(fn=process_relight, inputs=ips, outputs=[output_bg, result_gallery])
+    return block
+if __name__ == "__main__":
+    print(f"Gradio version: {gr.__version__}")
+    app = create_demo()
+    app.launch(server_name='0.0.0.0')