dinhvanvu94 commited on
Commit
6a59ab9
·
verified ·
1 Parent(s): d8a0e68

Create app_2511.py

Browse files
Files changed (1) hide show
  1. app_2511.py +247 -0
app_2511.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app_2511.py
2
+ import random
3
+ from typing import Optional, Tuple
4
+
5
+ import gradio as gr
6
+ import numpy as np
7
+ import torch
8
+ from PIL import Image
9
+
10
+ from diffusers import QwenImageEditPlusPipeline
11
+
12
+ # -----------------------------------------------------------------------------
13
+ # Optional: Hugging Face Spaces helpers (chỉ có trên Spaces runtime)
14
+ # -----------------------------------------------------------------------------
15
+ try:
16
+ import spaces # type: ignore
17
+ except Exception:
18
+ spaces = None
19
+
20
+ # -----------------------------------------------------------------------------
21
+ # Global config
22
+ # -----------------------------------------------------------------------------
23
+ dtype = torch.bfloat16
24
+ device = "cuda" if torch.cuda.is_available() else "cpu"
25
+ MAX_SEED = np.iinfo(np.int32).max
26
+
27
+ _pipe: Optional[QwenImageEditPlusPipeline] = None
28
+
29
+
30
+ def get_pipe() -> QwenImageEditPlusPipeline:
31
+ """
32
+ Load Qwen-Image-Edit-2511 + fal Multiple Angles LoRA as a global singleton.
33
+ """
34
+ global _pipe
35
+ if _pipe is not None:
36
+ return _pipe
37
+
38
+ pipe = QwenImageEditPlusPipeline.from_pretrained(
39
+ "Qwen/Qwen-Image-Edit-2511",
40
+ torch_dtype=dtype,
41
+ )
42
+
43
+ if device == "cuda":
44
+ pipe = pipe.to("cuda")
45
+
46
+ # Load LoRA: fal/Qwen-Image-Edit-2511-Multiple-Angles-LoRA
47
+ pipe.load_lora_weights(
48
+ "fal/Qwen-Image-Edit-2511-Multiple-Angles-LoRA",
49
+ weight_name="qwen-image-edit-2511-multiple-angles-lora.safetensors",
50
+ adapter_name="angles",
51
+ )
52
+ pipe.set_adapters(["angles"], adapter_weights=[1.0])
53
+
54
+ # Fuse LoRA for speed. Strength recommended around 0.8–1.0, default 0.9.
55
+ pipe.fuse_lora(adapter_names=["angles"], lora_scale=0.9)
56
+ pipe.unload_lora_weights()
57
+
58
+ # Optional AOTI (Spaces only) - keep same pattern; adjust repo if you have a 2511-specific AOTI artifact.
59
+ if spaces is not None and hasattr(spaces, "aoti_blocks_load"):
60
+ try:
61
+ spaces.aoti_blocks_load(pipe.transformer, "zerogpu-aoti/Qwen-Image", variant="fa3")
62
+ print("[INFO] spaces.aoti_blocks_load enabled")
63
+ except Exception as e:
64
+ print(f"[WARN] spaces.aoti_blocks_load failed: {e}")
65
+
66
+ _pipe = pipe
67
+ return _pipe
68
+
69
+
70
+ # -----------------------------------------------------------------------------
71
+ # Angle prompt (fal schema)
72
+ # -----------------------------------------------------------------------------
73
+ AZIMUTH_CHOICES = [
74
+ "front view",
75
+ "front-right quarter view",
76
+ "right side view",
77
+ "back-right quarter view",
78
+ "back view",
79
+ "back-left quarter view",
80
+ "left side view",
81
+ "front-left quarter view",
82
+ ]
83
+
84
+ ELEVATION_CHOICES = [
85
+ "low-angle shot",
86
+ "eye-level shot",
87
+ "elevated shot",
88
+ "high-angle shot",
89
+ ]
90
+
91
+ DISTANCE_CHOICES = [
92
+ "close-up",
93
+ "medium shot",
94
+ "wide shot",
95
+ ]
96
+
97
+
98
+ def build_fal_angles_prompt(azimuth: str, elevation: str, distance: str) -> str:
99
+ # fal requires: <sks> [azimuth] [elevation] [distance]
100
+ return f"<sks> {azimuth} {elevation} {distance}"
101
+
102
+
103
+ # -----------------------------------------------------------------------------
104
+ # Inference
105
+ # -----------------------------------------------------------------------------
106
+ def infer_angles_edit(
107
+ image: Optional[Image.Image],
108
+ azimuth: str,
109
+ elevation: str,
110
+ distance: str,
111
+ seed: int,
112
+ randomize_seed: bool,
113
+ true_guidance_scale: float,
114
+ num_inference_steps: int,
115
+ height: int,
116
+ width: int,
117
+ ) -> Tuple[Image.Image, int, str]:
118
+ if image is None:
119
+ raise gr.Error("Please upload an image.")
120
+
121
+ if randomize_seed:
122
+ seed = random.randint(0, MAX_SEED)
123
+
124
+ prompt = build_fal_angles_prompt(azimuth, elevation, distance)
125
+ print("Prompt:", prompt)
126
+
127
+ pipe = get_pipe()
128
+ generator = torch.Generator(device=device).manual_seed(int(seed))
129
+
130
+ out = pipe(
131
+ image=[image.convert("RGB")],
132
+ prompt=prompt,
133
+ height=height if height and height > 0 else None,
134
+ width=width if width and width > 0 else None,
135
+ num_inference_steps=int(num_inference_steps),
136
+ true_cfg_scale=float(true_guidance_scale),
137
+ guidance_scale=1.0,
138
+ negative_prompt=" ",
139
+ generator=generator,
140
+ num_images_per_prompt=1,
141
+ ).images[0]
142
+
143
+ return out, seed, prompt
144
+
145
+
146
+ # -----------------------------------------------------------------------------
147
+ # UI (Gradio 6.3.0 compatible)
148
+ # -----------------------------------------------------------------------------
149
+ CAMERA_3D_HTML_TEMPLATE = """
150
+ <div style="width:100%;height:400px;background:#1a1a1a;border-radius:12px;
151
+ display:flex;align-items:center;justify-content:center;color:#aaa;">
152
+ <div style="text-align:center;">
153
+ <div style="font-size:18px;color:#00ff88;font-weight:700;">3D Camera Preview</div>
154
+ <div style="margin-top:8px;">(Gradio 6) JS ↔ Python sync removed</div>
155
+ <div style="margin-top:6px;">Use dropdowns to control camera</div>
156
+ </div>
157
+ </div>
158
+ """
159
+
160
+ css = """
161
+ #camera-3d-control { min-height: 400px; }
162
+ """
163
+
164
+ with gr.Blocks(css=css) as demo:
165
+ gr.Markdown("## 🎬 Qwen Image Edit — 2511 + Multiple Angles LoRA (UI only)")
166
+
167
+ with gr.Row():
168
+ with gr.Column(scale=1):
169
+ image = gr.Image(label="Input Image", type="pil", height=280)
170
+
171
+ with gr.Tab("🎮 3D Camera Preview"):
172
+ _ = gr.HTML(value=CAMERA_3D_HTML_TEMPLATE, elem_id="camera-3d-control")
173
+
174
+ with gr.Tab("🎚️ Angles (fal schema)"):
175
+ azimuth = gr.Dropdown(
176
+ choices=AZIMUTH_CHOICES,
177
+ value="front view",
178
+ label="Azimuth (8 directions)",
179
+ )
180
+ elevation = gr.Dropdown(
181
+ choices=ELEVATION_CHOICES,
182
+ value="eye-level shot",
183
+ label="Elevation (4 levels)",
184
+ )
185
+ distance = gr.Dropdown(
186
+ choices=DISTANCE_CHOICES,
187
+ value="medium shot",
188
+ label="Distance (3 levels)",
189
+ )
190
+
191
+ run_btn = gr.Button("🚀 Generate", variant="primary")
192
+
193
+ with gr.Column(scale=1):
194
+ result = gr.Image(label="Output Image", height=350)
195
+ prompt_preview = gr.Textbox(label="Generated Prompt", interactive=False)
196
+
197
+ with gr.Accordion("⚙️ Advanced", open=False):
198
+ seed = gr.Slider(0, MAX_SEED, step=1, label="Seed", value=0)
199
+ randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
200
+
201
+ # Thường 2511 edit chạy ổn với steps 8–16, true_cfg_scale khoảng 3–6 (tùy ảnh).
202
+ true_guidance_scale = gr.Slider(1.0, 10.0, step=0.1, label="True CFG Scale", value=4.0)
203
+ num_inference_steps = gr.Slider(1, 40, step=1, label="Steps", value=12)
204
+
205
+ height = gr.Slider(256, 2048, step=8, value=1024, label="Height")
206
+ width = gr.Slider(256, 2048, step=8, value=1024, label="Width")
207
+
208
+ # (Optional) Quick tips
209
+ gr.Markdown(
210
+ "- Prompt format used: `<sks> azimuth elevation distance`\n"
211
+ "- Nếu ảnh bị méo: giảm **True CFG Scale** hoặc tăng **Steps** nhẹ.\n"
212
+ "- Nếu đổi góc chưa mạnh: tăng **True CFG Scale** một chút (không cần tăng LoRA strength)."
213
+ )
214
+
215
+ run_btn.click(
216
+ fn=infer_angles_edit,
217
+ inputs=[
218
+ image,
219
+ azimuth,
220
+ elevation,
221
+ distance,
222
+ seed,
223
+ randomize_seed,
224
+ true_guidance_scale,
225
+ num_inference_steps,
226
+ height,
227
+ width,
228
+ ],
229
+ outputs=[result, seed, prompt_preview],
230
+ # chưa cần api -> không set api_name
231
+ )
232
+
233
+ if __name__ == "__main__":
234
+ demo.queue(max_size=16)
235
+ demo.launch(
236
+ server_name="127.0.0.1",
237
+ server_port=7861,
238
+ share=False,
239
+ debug=True,
240
+ show_error=True,
241
+ prevent_thread_lock=True,
242
+ )
243
+
244
+ # giữ process sống nếu chạy bằng python app_2511.py
245
+ import time as _time
246
+ while True:
247
+ _time.sleep(3600)