Alexander Bagus commited on
Commit
3f4f137
·
1 Parent(s): 4b60de2
Files changed (1) hide show
  1. app.py +16 -12
app.py CHANGED
@@ -6,9 +6,7 @@ from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
6
  from transformers import AutoTokenizer, Qwen3ForCausalLM
7
  from safetensors.torch import load_file
8
  from diffusers import AutoencoderKL
9
- from utils.image_utils import get_image_latent, rescale_image
10
- from utils.prompt_utils import polish_prompt
11
- from utils import repo_utils
12
  from controlnet_aux.processor import Processor
13
  from omegaconf import OmegaConf
14
 
@@ -18,6 +16,7 @@ repo_utils.move_folder("repos/VideoX-Fun/videox_fun", "videox_fun")
18
  repo_utils.move_folder("repos/VideoX-Fun/config", "config")
19
  from videox_fun.pipeline import ZImageControlPipeline
20
  from videox_fun.models import ZImageControlTransformer2DModel
 
21
 
22
  #clone models
23
  repo_utils.clone_repo_if_not_exists("https://huggingface.co/Tongyi-MAI/Z-Image-Turbo", "models")
@@ -80,13 +79,9 @@ pipe = ZImageControlPipeline(
80
  pipe.to("cuda", torch.bfloat16)
81
  print("pipe ready.")
82
 
83
- # ======== AoTI compilation + FA3 ========
84
- # pipe.transformer.layers._repeated_blocks = ["ZImageTransformerBlock"]
85
- # spaces.aoti_blocks_load(pipe.transformer.layers, "zerogpu-aoti/Z-Image", variant="fa3")
86
-
87
  def prepare(prompt, is_polish_prompt):
88
  if not is_polish_prompt: return prompt, False
89
- polished_prompt = polish_prompt(prompt)
90
  return polished_prompt, True
91
 
92
  @spaces.GPU
@@ -128,7 +123,7 @@ def inference(
128
  processor = Processor(processor_id)
129
 
130
  # Width must be divisible by 16
131
- control_image, width, height = rescale_image(input_image, image_scale, 16)
132
  control_image = control_image.resize((1024, 1024))
133
 
134
  print("DEBUG: control_image_torch")
@@ -192,11 +187,19 @@ with gr.Blocks(css=css) as demo:
192
  gr.HTML(read_file("static/header.html"))
193
  with gr.Row():
194
  with gr.Column():
 
 
 
 
 
 
 
195
  input_image = gr.Image(
196
- height=290, sources=['upload', 'clipboard'],
 
197
  image_mode='RGB',
198
- # elem_id="image_upload",
199
- type="pil", label="Upload")
200
 
201
  prompt = gr.Textbox(
202
  label="Prompt",
@@ -273,6 +276,7 @@ with gr.Blocks(css=css) as demo:
273
  gr.Examples(examples=examples, inputs=[input_image, prompt, control_mode])
274
  gr.Markdown(read_file("static/footer.md"))
275
 
 
276
  run_button.click(
277
  fn=prepare,
278
  inputs=[prompt, is_polish_prompt],
 
6
  from transformers import AutoTokenizer, Qwen3ForCausalLM
7
  from safetensors.torch import load_file
8
  from diffusers import AutoencoderKL
9
+ from utils import repo_utils, prompt_utils, image_utils
 
 
10
  from controlnet_aux.processor import Processor
11
  from omegaconf import OmegaConf
12
 
 
16
  repo_utils.move_folder("repos/VideoX-Fun/config", "config")
17
  from videox_fun.pipeline import ZImageControlPipeline
18
  from videox_fun.models import ZImageControlTransformer2DModel
19
+ from videox_fun.utils.utils import get_image_latent
20
 
21
  #clone models
22
  repo_utils.clone_repo_if_not_exists("https://huggingface.co/Tongyi-MAI/Z-Image-Turbo", "models")
 
79
  pipe.to("cuda", torch.bfloat16)
80
  print("pipe ready.")
81
 
 
 
 
 
82
  def prepare(prompt, is_polish_prompt):
83
  if not is_polish_prompt: return prompt, False
84
+ polished_prompt = prompt_utils.polish_prompt(prompt)
85
  return polished_prompt, True
86
 
87
  @spaces.GPU
 
123
  processor = Processor(processor_id)
124
 
125
  # Width must be divisible by 16
126
+ control_image, width, height = image_utils.rescale_image(input_image, image_scale, 16)
127
  control_image = control_image.resize((1024, 1024))
128
 
129
  print("DEBUG: control_image_torch")
 
187
  gr.HTML(read_file("static/header.html"))
188
  with gr.Row():
189
  with gr.Column():
190
+ mask_image = gr.ImagePaint(
191
+ height=290,
192
+ sources=['upload', 'clipboard'],
193
+ image_mode='RGB',
194
+ type="pil", label="Mask Image"
195
+ )
196
+
197
  input_image = gr.Image(
198
+ height=290,
199
+ sources=['upload', 'clipboard'],
200
  image_mode='RGB',
201
+ type="pil", label="Upload"
202
+ )
203
 
204
  prompt = gr.Textbox(
205
  label="Prompt",
 
276
  gr.Examples(examples=examples, inputs=[input_image, prompt, control_mode])
277
  gr.Markdown(read_file("static/footer.md"))
278
 
279
+ mask_image.upload(fn=lambda x: x, inputs=[mask_image], outputs=[input_image])
280
  run_button.click(
281
  fn=prepare,
282
  inputs=[prompt, is_polish_prompt],