Spaces:
Running
on
Zero
Running
on
Zero
Alexander Bagus
commited on
Commit
·
371c0ec
1
Parent(s):
bb2d84c
22
Browse files- app.py +51 -50
- examples/0_examples.json +7 -6
app.py
CHANGED
|
@@ -1,65 +1,47 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import numpy as np
|
| 3 |
import torch, random, json, spaces, time
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
from utils import repo_utils, image_utils, prompt_utils
|
| 12 |
|
| 13 |
|
| 14 |
-
repo_utils.clone_repo_if_not_exists("https://github.com/apple/ml-starflow.git", "app/models")
|
| 15 |
-
repo_utils.clone_repo_if_not_exists("https://huggingface.co/apple/starflow", "app/models")
|
| 16 |
-
|
| 17 |
-
# MODEL_PATH = "models/Z-Image-Turbo/"
|
| 18 |
-
# CONTROLNET_PATH = "models/Z-Image-Turbo-Fun-Controlnet-Union/Z-Image-Turbo-Fun-Controlnet-Union.safetensors"
|
| 19 |
|
| 20 |
DTYPE = torch.bfloat16
|
| 21 |
MAX_SEED = np.iinfo(np.int32).max
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
# ## Load controlnet
|
| 35 |
-
# state_dict = load_file(CONTROLNET_PATH)
|
| 36 |
-
# state_dict = state_dict["state_dict"] if "state_dict" in state_dict else state_dict
|
| 37 |
-
# m, u = transformer.load_state_dict(state_dict, strict=False)
|
| 38 |
-
# print(f"missing keys: {len(m)}, unexpected keys: {len(u)}")
|
| 39 |
-
|
| 40 |
-
# # load ZImageControlPipeline
|
| 41 |
-
# vae = AutoencoderKL.from_pretrained(
|
| 42 |
-
# MODEL_PATH,
|
| 43 |
-
# subfolder="vae",
|
| 44 |
-
# device_map="cuda",
|
| 45 |
-
# torch_dtype= DTYPE
|
| 46 |
-
# )
|
| 47 |
|
| 48 |
-
#
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
-
# text_encoder = Qwen3ForCausalLM.from_pretrained(
|
| 54 |
-
# MODEL_PATH,
|
| 55 |
-
# subfolder="text_encoder",
|
| 56 |
-
# torch_dtype=DTYPE,
|
| 57 |
-
# )
|
| 58 |
|
| 59 |
-
# scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
|
| 60 |
-
# MODEL_PATH,
|
| 61 |
-
# subfolder="scheduler"
|
| 62 |
-
# )
|
| 63 |
|
| 64 |
# pipe = ZImageControlPipeline(
|
| 65 |
# vae=vae,
|
|
@@ -90,6 +72,25 @@ def inference(
|
|
| 90 |
timestamp = time.time()
|
| 91 |
print(f"timestamp: {timestamp}")
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
# # process image
|
| 94 |
# print("DEBUG: process image")
|
| 95 |
# if input_image is None:
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import numpy as np
|
| 3 |
import torch, random, json, spaces, time
|
| 4 |
+
from diffsynth.pipelines.qwen_image import (
|
| 5 |
+
QwenImagePipeline, ModelConfig,
|
| 6 |
+
QwenImageUnit_Image2LoRAEncode, QwenImageUnit_Image2LoRADecode
|
| 7 |
+
)
|
| 8 |
+
from safetensors.torch import save_file
|
| 9 |
+
import torch
|
| 10 |
+
from PIL import Image
|
| 11 |
+
# from utils import repo_utils, image_utils, prompt_utils
|
| 12 |
|
| 13 |
|
| 14 |
+
# repo_utils.clone_repo_if_not_exists("https://github.com/apple/ml-starflow.git", "app/models")
|
| 15 |
+
# repo_utils.clone_repo_if_not_exists("https://huggingface.co/apple/starflow", "app/models")
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
DTYPE = torch.bfloat16
|
| 18 |
MAX_SEED = np.iinfo(np.int32).max
|
| 19 |
|
| 20 |
+
vram_config_disk_offload = {
|
| 21 |
+
"offload_dtype": "disk",
|
| 22 |
+
"offload_device": "disk",
|
| 23 |
+
"onload_dtype": "disk",
|
| 24 |
+
"onload_device": "disk",
|
| 25 |
+
"preparing_dtype": torch.bfloat16,
|
| 26 |
+
"preparing_device": "cuda",
|
| 27 |
+
"computation_dtype": torch.bfloat16,
|
| 28 |
+
"computation_device": "cuda",
|
| 29 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
+
# Load models
|
| 32 |
+
pipe = QwenImagePipeline.from_pretrained(
|
| 33 |
+
torch_dtype=torch.bfloat16,
|
| 34 |
+
device="cuda",
|
| 35 |
+
model_configs=[
|
| 36 |
+
ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="SigLIP2-G384/model.safetensors", **vram_config_disk_offload),
|
| 37 |
+
ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="DINOv3-7B/model.safetensors", **vram_config_disk_offload),
|
| 38 |
+
ModelConfig(model_id="DiffSynth-Studio/Qwen-Image-i2L", origin_file_pattern="Qwen-Image-i2L-Style.safetensors", **vram_config_disk_offload),
|
| 39 |
+
],
|
| 40 |
+
processor_config=ModelConfig(model_id="Qwen/Qwen-Image-Edit", origin_file_pattern="processor/"),
|
| 41 |
+
vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
|
| 42 |
+
)
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
# pipe = ZImageControlPipeline(
|
| 47 |
# vae=vae,
|
|
|
|
| 72 |
timestamp = time.time()
|
| 73 |
print(f"timestamp: {timestamp}")
|
| 74 |
|
| 75 |
+
# Load images
|
| 76 |
+
images = [
|
| 77 |
+
Image.open("examples/style/1/0.jpg"),
|
| 78 |
+
Image.open("examples/style/1/1.jpg"),
|
| 79 |
+
Image.open("examples/style/1/2.jpg"),
|
| 80 |
+
Image.open("examples/style/1/3.jpg"),
|
| 81 |
+
Image.open("examples/style/1/4.jpg"),
|
| 82 |
+
]
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
# Model inference
|
| 86 |
+
with torch.no_grad():
|
| 87 |
+
embs = QwenImageUnit_Image2LoRAEncode().process(pipe, image2lora_images=images)
|
| 88 |
+
lora = QwenImageUnit_Image2LoRADecode().process(pipe, **embs)["lora"]
|
| 89 |
+
|
| 90 |
+
save_file(lora, "model_style.safetensors")
|
| 91 |
+
|
| 92 |
+
return True
|
| 93 |
+
|
| 94 |
# # process image
|
| 95 |
# print("DEBUG: process image")
|
| 96 |
# if input_image is None:
|
examples/0_examples.json
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
|
|
|
| 7 |
]
|
|
|
|
| 1 |
+
[
|
| 2 |
+
"Saitama, punching",
|
| 3 |
+
"Levi Ackerman, kick",
|
| 4 |
+
"Goku, powering up",
|
| 5 |
+
"Light Yagami, writing",
|
| 6 |
+
"Naruto, eating ramen",
|
| 7 |
+
"Luffy, grinning"
|
| 8 |
]
|