Alexander Bagus commited on
Commit
371c0ec
·
1 Parent(s): bb2d84c
Files changed (2) hide show
  1. app.py +51 -50
  2. examples/0_examples.json +7 -6
app.py CHANGED
@@ -1,65 +1,47 @@
1
  import gradio as gr
2
  import numpy as np
3
  import torch, random, json, spaces, time
4
- # from safetensors.torch import load_file
5
- # from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
6
- # from videox_fun.pipeline import ZImageControlPipeline
7
- # from videox_fun.models import ZImageControlTransformer2DModel
8
- # from transformers import AutoTokenizer, Qwen3ForCausalLM
9
- # from diffusers import AutoencoderKL
10
- # from controlnet_aux.processor import Processor
11
- from utils import repo_utils, image_utils, prompt_utils
12
 
13
 
14
- repo_utils.clone_repo_if_not_exists("https://github.com/apple/ml-starflow.git", "app/models")
15
- repo_utils.clone_repo_if_not_exists("https://huggingface.co/apple/starflow", "app/models")
16
-
17
- # MODEL_PATH = "models/Z-Image-Turbo/"
18
- # CONTROLNET_PATH = "models/Z-Image-Turbo-Fun-Controlnet-Union/Z-Image-Turbo-Fun-Controlnet-Union.safetensors"
19
 
20
  DTYPE = torch.bfloat16
21
  MAX_SEED = np.iinfo(np.int32).max
22
 
23
- # # load transformer
24
- # transformer = ZImageControlTransformer2DModel.from_pretrained(
25
- # MODEL_PATH,
26
- # subfolder="transformer",
27
- # transformer_additional_kwargs={
28
- # "control_layers_places": [0, 5, 10, 15, 20, 25],
29
- # "control_in_dim": 16
30
- # },
31
- # torch_dtype= DTYPE
32
- # ).to("cuda")
33
-
34
- # ## Load controlnet
35
- # state_dict = load_file(CONTROLNET_PATH)
36
- # state_dict = state_dict["state_dict"] if "state_dict" in state_dict else state_dict
37
- # m, u = transformer.load_state_dict(state_dict, strict=False)
38
- # print(f"missing keys: {len(m)}, unexpected keys: {len(u)}")
39
-
40
- # # load ZImageControlPipeline
41
- # vae = AutoencoderKL.from_pretrained(
42
- # MODEL_PATH,
43
- # subfolder="vae",
44
- # device_map="cuda",
45
- # torch_dtype= DTYPE
46
- # )
47
 
48
- # tokenizer = AutoTokenizer.from_pretrained(
49
- # MODEL_PATH,
50
- # subfolder="tokenizer"
51
- # )
 
 
 
 
 
 
 
 
52
 
53
- # text_encoder = Qwen3ForCausalLM.from_pretrained(
54
- # MODEL_PATH,
55
- # subfolder="text_encoder",
56
- # torch_dtype=DTYPE,
57
- # )
58
 
59
- # scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
60
- # MODEL_PATH,
61
- # subfolder="scheduler"
62
- # )
63
 
64
  # pipe = ZImageControlPipeline(
65
  # vae=vae,
@@ -90,6 +72,25 @@ def inference(
90
  timestamp = time.time()
91
  print(f"timestamp: {timestamp}")
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  # # process image
94
  # print("DEBUG: process image")
95
  # if input_image is None:
 
1
  import gradio as gr
2
  import numpy as np
3
  import torch, random, json, spaces, time
4
+ from diffsynth.pipelines.qwen_image import (
5
+ QwenImagePipeline, ModelConfig,
6
+ QwenImageUnit_Image2LoRAEncode, QwenImageUnit_Image2LoRADecode
7
+ )
8
+ from safetensors.torch import save_file
9
+ import torch
10
+ from PIL import Image
11
+ # from utils import repo_utils, image_utils, prompt_utils
12
 
13
 
14
+ # repo_utils.clone_repo_if_not_exists("https://github.com/apple/ml-starflow.git", "app/models")
15
+ # repo_utils.clone_repo_if_not_exists("https://huggingface.co/apple/starflow", "app/models")
 
 
 
16
 
17
  DTYPE = torch.bfloat16
18
  MAX_SEED = np.iinfo(np.int32).max
19
 
20
+ vram_config_disk_offload = {
21
+ "offload_dtype": "disk",
22
+ "offload_device": "disk",
23
+ "onload_dtype": "disk",
24
+ "onload_device": "disk",
25
+ "preparing_dtype": torch.bfloat16,
26
+ "preparing_device": "cuda",
27
+ "computation_dtype": torch.bfloat16,
28
+ "computation_device": "cuda",
29
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ # Load models
32
+ pipe = QwenImagePipeline.from_pretrained(
33
+ torch_dtype=torch.bfloat16,
34
+ device="cuda",
35
+ model_configs=[
36
+ ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="SigLIP2-G384/model.safetensors", **vram_config_disk_offload),
37
+ ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="DINOv3-7B/model.safetensors", **vram_config_disk_offload),
38
+ ModelConfig(model_id="DiffSynth-Studio/Qwen-Image-i2L", origin_file_pattern="Qwen-Image-i2L-Style.safetensors", **vram_config_disk_offload),
39
+ ],
40
+ processor_config=ModelConfig(model_id="Qwen/Qwen-Image-Edit", origin_file_pattern="processor/"),
41
+ vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
42
+ )
43
 
 
 
 
 
 
44
 
 
 
 
 
45
 
46
  # pipe = ZImageControlPipeline(
47
  # vae=vae,
 
72
  timestamp = time.time()
73
  print(f"timestamp: {timestamp}")
74
 
75
+ # Load images
76
+ images = [
77
+ Image.open("examples/style/1/0.jpg"),
78
+ Image.open("examples/style/1/1.jpg"),
79
+ Image.open("examples/style/1/2.jpg"),
80
+ Image.open("examples/style/1/3.jpg"),
81
+ Image.open("examples/style/1/4.jpg"),
82
+ ]
83
+
84
+
85
+ # Model inference
86
+ with torch.no_grad():
87
+ embs = QwenImageUnit_Image2LoRAEncode().process(pipe, image2lora_images=images)
88
+ lora = QwenImageUnit_Image2LoRADecode().process(pipe, **embs)["lora"]
89
+
90
+ save_file(lora, "model_style.safetensors")
91
+
92
+ return True
93
+
94
  # # process image
95
  # print("DEBUG: process image")
96
  # if input_image is None:
examples/0_examples.json CHANGED
@@ -1,7 +1,8 @@
1
- [
2
- ["examples/pose2.jpg", "Woman wearing jeans and tanktop, from dubai", "HED"],
3
- ["examples/bottle.jpg", "A man holding a bottle", "HED"],
4
- ["examples/room.jpg", "modern architecture, living room", "Depth"],
5
- ["examples/pose1.jpg", "A female paladin. Mountain background.", "Pose"],
6
- ["examples/bird.jpg", "A bird sitting on a branch, cartoon.", "Canny"]
 
7
  ]
 
1
+ [
2
+ "Saitama, punching",
3
+ "Levi Ackerman, kick",
4
+ "Goku, powering up",
5
+ "Light Yagami, writing",
6
+ "Naruto, eating ramen",
7
+ "Luffy, grinning"
8
  ]