tori29umai's picture
Update app.py
7d2e3d8 verified
raw
history blame
18.4 kB
import gradio as gr
import numpy as np
import random
import torch
import spaces
from PIL import Image
from diffusers import FlowMatchEulerDiscreteScheduler
from optimization import optimize_pipeline_
from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
import math
# --- Model Loading ---
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
scheduler_config = {
"base_image_seq_len": 256,
"base_shift": math.log(3),
"invert_sigmas": False,
"max_image_seq_len": 8192,
"max_shift": math.log(3),
"num_train_timesteps": 1000,
"shift": 1.0,
"shift_terminal": None,
"stochastic_sampling": False,
"time_shift_type": "exponential",
"use_beta_sigmas": False,
"use_dynamic_shifting": True,
"use_exponential_sigmas": False,
"use_karras_sigmas": False,
}
scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
pipe = QwenImageEditPlusPipeline.from_pretrained(
"Qwen/Qwen-Image-Edit-2509",
scheduler=scheduler,
torch_dtype=dtype
).to(device)
pipe.load_lora_weights(
"2vXpSwA7/iroiro-lora",
weight_name="qwen_lora/Qwen-Image-Edit-2509-Lightning-4steps-V1.0-bf16_dim1.safetensors"
)
pipe.fuse_lora(lora_scale=0.7)
pipe.load_lora_weights(
"dx8152/Qwen-Edit-2509-Multiple-angles",
weight_name="多角度.safetensors"
)
pipe.fuse_lora(lora_scale=1.0)
pipe.transformer.__class__ = QwenImageTransformer2DModel
pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")
# --- Constants ---
MAX_SEED = np.iinfo(np.int32).max
# 内部デフォルト(アコーディオンの初期値にも使用)
DEFAULT_SEED = 0
DEFAULT_RANDOMIZE = True
DEFAULT_TRUE_GUIDANCE_SCALE = 1.0
DEFAULT_NUM_INFERENCE_STEPS = 4
# カメラオプション(送信値は常に 'cn')
CAMERA_OPTIONS = [
{"cn": "镜头方向左回转45度", "ja": "カメラを左に45度回転", "en": "Rotate camera 45° left"},
{"cn": "镜头向右回转45度", "ja": "カメラを右に45度回転", "en": "Rotate camera 45° right"},
{"cn": "镜头方向左回转90度", "ja": "カメラを左に90度回転", "en": "Rotate camera 90° left"},
{"cn": "镜头向右回转90度", "ja": "カメラを右に90度回転", "en": "Rotate camera 90° right"},
{"cn": "将镜头转为俯视", "ja": "カメラを上から見下ろす視点に切り替える", "en": "Switch to top-down view"},
{"cn": "将镜头转为仰视", "ja": "カメラを下から見上げる視点に切り替える", "en": "Switch to low-angle view"},
{"cn": "镜头转相机平面视", "ja": "カメラを平面視に切り替える", "en": "Switch to orthographic view"},
{"cn": "将镜头转为特写镜头", "ja": "カメラをクローズアップに切り替える", "en": "Switch to close-up lens"},
{"cn": "将镜头转为中近景镜头", "ja": "カメラをややクローズアップに切り替える", "en": "Switch to medium close-up lens"},
{"cn": "镜头转为广角镜头", "ja": "カメラをズームアウトに切り替える", "en": "Switch to wide-angle lens"},
{"cn": "拉远镜头以拍摄被摄体全景", "ja": "被写体の全容を映すようにカメラを引く", "en": "Pull back the camera to capture the whole subject"},
{"cn": "将镜头移动到被摄体正面", "ja": "カメラを被写体の正面に移動する", "en": "Move the camera to the front of the subject"},
{"cn": "将镜头移动到被摄体背后", "ja": "カメラを被写体の背面に移動する", "en": "Move camera behind the subject"},
]
# 自由入力オプション(言語別表示)
CUSTOM_OPTION_VALUE = "__custom__"
CUSTOM_LABELS = {
"en": "Custom (enter Chinese prompt)",
"ja": "自由入力(中国語で入力)",
"zh": "自定义(用中文输入)",
}
# i18n 辞書
I18N = {
"title": {
"en": "Camera Work ",
"ja": "カメラワーク",
"zh": "镜头控制",
},
"notice": {
"en": "Note: Please avoid uploading images created by others. There may be rights infringements.",
"ja": "注意:他者が作成した画像のアップロードはご遠慮ください。権利侵害の可能性があります。",
"zh": "注意:请勿上传他人创作的图片,可能涉及权利侵害。",
},
"input_image": {"en": "Input image", "ja": "入力画像", "zh": "输入图像"},
"dropdown_label": {
"en": "Camera work (label shows CN + selected language)",
"ja": "カメラワーク(表示は 中国語+選択言語)",
"zh": "镜头操作(显示为 中文+所选语言)",
},
"custom_cn_label": {
"en": "Custom Chinese prompt",
"ja": "自由入力の中国語プロンプト",
"zh": "自定义中文提示词",
},
"custom_cn_ph": {
"en": "e.g., 将镜头转为斜俯视 并 拉远镜头",
"ja": "例: 将镜头转为斜俯视 并 拉远镜头",
"zh": "例如:将镜头转为斜俯视 并 拉远镜头",
},
"extra_label": {
"en": "Extra prompt (optional, appended at end)",
"ja": "追加プロンプト(任意・末尾に付加)",
"zh": "附加提示词(可选,追加在末尾)",
},
"extra_ph": {
"en": "e.g., high detail, soft lighting, anime style, 4k",
"ja": "例: high detail, soft lighting, anime style, 4k",
"zh": "例如:high detail, soft lighting, anime style, 4k",
},
"accordion": {"en": "Show advanced settings", "ja": "詳細設定を開く", "zh": "展开高级设置"},
"seed": {"en": "Seed", "ja": "Seed", "zh": "Seed"},
"rand": {"en": "Randomize seed", "ja": "ランダムシード", "zh": "随机种子"},
"tgs": {"en": "True guidance scale", "ja": "True guidance scale", "zh": "True guidance scale"},
"steps": {"en": "Steps", "ja": "生成ステップ数", "zh": "生成步数"},
"run": {"en": "Generate", "ja": "生成", "zh": "生成"},
"sel_cn": {
"en": "Selected camera prompt (to be sent, Chinese)",
"ja": "選択中のカメラプロンプト(送信対象・中国語)",
"zh": "所选镜头提示(发送内容,中文)",
},
"final_prev": {
"en": "Final prompt to be sent (Chinese + extra)",
"ja": "最終的に送信されるプロンプト(中国語+追記)",
"zh": "最终发送的提示(中文+附加)",
},
"output": {"en": "Output image", "ja": "出力画像", "zh": "输出图像"},
"status": {"en": "Status", "ja": "ステータス", "zh": "状态"},
"sent_hdr": {"en": "**Prompt sent (for reference)**", "ja": "**送信プロンプト(確認用)**", "zh": "**已发送提示(供参考)**"},
"status_ok": {
"en": "✅ Generated 1 image with Chinese prompt (PNG).",
"ja": "✅ 中国語プロンプトで1枚生成しました(PNG)。",
"zh": "✅ 使用中文提示生成了 1 张图片(PNG)。",
},
"err_no_img": {
"en": "Error: Please upload an input image.",
"ja": "エラー: 入力画像をアップロードしてください",
"zh": "错误:请先上传输入图像。",
},
"err_no_custom": {
"en": "Error: Please enter a custom Chinese prompt.",
"ja": "エラー: 自由入力の中国語プロンプトを入力してください",
"zh": "错误:请输入自定义中文提示词。",
},
"lang_label": {"en": "UI Language", "ja": "UI言語", "zh": "界面语言"},
}
def t(key, lang):
return I18N[key][lang]
def build_dropdown_choices(lang):
# ラベルは「中国語 + (選択言語の説明)」で表示、送信値は常に中国語
if lang not in ("en", "ja", "zh"):
lang = "en"
ch = []
for item in CAMERA_OPTIONS:
label = f"{item['cn']}, {item[lang]}"
ch.append((label, item["cn"]))
ch.append((CUSTOM_LABELS[lang], CUSTOM_OPTION_VALUE))
return ch
def _append_prompt(base: str, extra: str) -> str:
extra = (extra or "").strip()
return (base if not extra else f"{base} {extra}").strip()
def generate_single_view(input_images, prompt, seed, num_inference_steps, true_guidance_scale):
generator = torch.Generator(device=device).manual_seed(seed)
result = pipe(
image=input_images if input_images else None,
prompt=prompt,
negative_prompt=" ",
num_inference_steps=num_inference_steps,
generator=generator,
true_cfg_scale=true_guidance_scale,
num_images_per_prompt=1,
).images
return result[0]
@spaces.GPU()
def generate_from_dropdown(
image,
dropdown_value_cn,
custom_cn,
extra_prompt="",
seed=DEFAULT_SEED,
randomize_seed=DEFAULT_RANDOMIZE,
true_guidance_scale=DEFAULT_TRUE_GUIDANCE_SCALE,
num_inference_steps=DEFAULT_NUM_INFERENCE_STEPS,
lang="en",
progress=gr.Progress(track_tqdm=True),
):
if randomize_seed:
seed = random.randint(0, MAX_SEED)
if image is None:
return None, t("err_no_img", lang), ""
if isinstance(image, Image.Image):
input_image = image.convert("RGB")
else:
input_image = Image.open(image).convert("RGB")
pil_images = [input_image]
if dropdown_value_cn == CUSTOM_OPTION_VALUE:
base_cn = (custom_cn or "").strip()
if not base_cn:
return None, t("err_no_custom", lang), ""
else:
base_cn = dropdown_value_cn or CAMERA_OPTIONS[0]["cn"]
final_prompt = _append_prompt(base_cn, extra_prompt)
progress(0.6, desc="Generating..." if lang=="en" else ("生成中..." if lang=="ja" else "生成中..."))
out = generate_single_view(pil_images, final_prompt, seed, num_inference_steps, true_guidance_scale)
progress(1.0, desc="Done" if lang=="en" else ("完了" if lang=="ja" else "完成"))
return out, t("status_ok", lang), final_prompt
# --- UI ---
css = """
#app-wrap {margin: 0 auto; max-width: 1200px;}
.notice {
background: #fff8e1;
border: 1px solid #facc15;
color: #713f12;
padding: 12px 14px;
border-radius: 12px;
font-weight: 600;
line-height: 1.5;
margin-bottom: 10px;
}
.card {
background: white;
border: 1px solid #e5e7eb;
border-radius: 14px;
padding: 14px;
box-shadow: 0 1px 2px rgba(0,0,0,0.04);
}
.small { font-size: 12px; color: #6b7280; }
.preview {
background: #f9fafb;
border: 1px dashed #cbd5e1;
border-radius: 10px;
padding: 8px 10px;
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace;
white-space: pre-wrap;
}
"""
with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
# 言語選択(デフォルト英語)
lang_selector = gr.Radio(
label=I18N["lang_label"]["en"],
choices=[("English", "en"), ("日本語", "ja"), ("中文", "zh")],
value="en",
interactive=True,
)
title_md = gr.Markdown(I18N["title"]["en"])
with gr.Column(elem_id="app-wrap"):
notice_html = gr.HTML(f"<div class='notice'>{I18N['notice']['en']}</div>")
with gr.Row():
with gr.Column(scale=1):
input_image = gr.Image(label=I18N["input_image"]["en"], type="pil", height=420)
with gr.Column(scale=1, elem_classes=["card"]):
dropdown = gr.Dropdown(
label=I18N["dropdown_label"]["en"],
choices=build_dropdown_choices("en"),
value=CAMERA_OPTIONS[0]["cn"],
allow_custom_value=False,
interactive=True,
)
custom_cn = gr.Textbox(
label=I18N["custom_cn_label"]["en"],
placeholder=I18N["custom_cn_ph"]["en"],
visible=False,
lines=2
)
extra_prompt = gr.Textbox(
label=I18N["extra_label"]["en"],
placeholder=I18N["extra_ph"]["en"],
lines=2
)
# 詳細設定アコーディオン
with gr.Accordion(I18N["accordion"]["en"], open=False) as adv_acc:
seed = gr.Slider(label=I18N["seed"]["en"], minimum=0, maximum=MAX_SEED, step=1, value=DEFAULT_SEED)
randomize_seed = gr.Checkbox(label=I18N["rand"]["en"], value=DEFAULT_RANDOMIZE)
true_guidance_scale = gr.Slider(label=I18N["tgs"]["en"], minimum=1.0, maximum=10.0, step=0.1, value=DEFAULT_TRUE_GUIDANCE_SCALE)
num_inference_steps = gr.Slider(label=I18N["steps"]["en"], minimum=1, maximum=40, step=1, value=DEFAULT_NUM_INFERENCE_STEPS)
run_button = gr.Button(I18N["run"]["en"], variant="primary")
selected_cn = gr.Textbox(label=I18N["sel_cn"]["en"], value=CAMERA_OPTIONS[0]["cn"], interactive=False)
final_prompt_preview = gr.Textbox(label=I18N["final_prev"]["en"], value="", interactive=False)
# 選択/入力のたびにプレビュー更新 & 自由入力欄の表示切替
def _sync(v_cn, extra, custom_text):
is_custom = (v_cn == CUSTOM_OPTION_VALUE)
base = (custom_text.strip() if is_custom else (v_cn or CAMERA_OPTIONS[0]["cn"]))
final = _append_prompt(base, extra) if base else ""
return base, final, gr.update(visible=is_custom)
dropdown.change(
fn=_sync,
inputs=[dropdown, extra_prompt, custom_cn],
outputs=[selected_cn, final_prompt_preview, custom_cn]
)
extra_prompt.change(
fn=_sync,
inputs=[dropdown, extra_prompt, custom_cn],
outputs=[selected_cn, final_prompt_preview, custom_cn]
)
custom_cn.change(
fn=_sync,
inputs=[dropdown, extra_prompt, custom_cn],
outputs=[selected_cn, final_prompt_preview, custom_cn]
)
with gr.Row():
with gr.Column(scale=1, elem_classes=["card"]):
result_image = gr.Image(label=I18N["output"]["en"], type="pil", format="png", height=520, show_download_button=True)
status_text = gr.Textbox(label=I18N["status"]["en"], interactive=False)
sent_hdr_md = gr.Markdown(I18N["sent_hdr"]["en"])
final_prompt_small = gr.Textbox(show_label=False, interactive=False, elem_classes=["preview", "small"])
# 言語切替の実装
def _switch_lang(lang, current_dropdown_value):
# ラベル/見出し/プレースホルダの更新 + ドロップダンのchoicesの再構築
return (
gr.update(label=I18N["lang_label"][lang]), # lang_selector label
I18N["title"][lang], # title_md value
gr.update(value=f"<div class='notice'>{I18N['notice'][lang]}</div>"), # notice_html
gr.update(label=I18N["input_image"][lang]), # input_image label
gr.update(label=I18N["dropdown_label"][lang],
choices=build_dropdown_choices(lang),
value=current_dropdown_value if current_dropdown_value else CAMERA_OPTIONS[0]["cn"]), # dropdown
gr.update(label=I18N["custom_cn_label"][lang], placeholder=I18N["custom_cn_ph"][lang]), # custom_cn
gr.update(label=I18N["extra_label"][lang], placeholder=I18N["extra_ph"][lang]), # extra_prompt
gr.update(label=I18N["seed"][lang]), # seed
gr.update(label=I18N["rand"][lang]), # randomize_seed
gr.update(label=I18N["tgs"][lang]), # true_guidance_scale
gr.update(label=I18N["steps"][lang]), # num_inference_steps
gr.update(value=I18N["run"][lang]), # run_button text
gr.update(label=I18N["sel_cn"][lang]), # selected_cn
gr.update(label=I18N["final_prev"][lang]), # final_prompt_preview
gr.update(label=I18N["output"][lang]), # result_image
gr.update(label=I18N["status"][lang]), # status_text
I18N["sent_hdr"][lang], # sent_hdr_md
)
lang_selector.change(
fn=_switch_lang,
inputs=[lang_selector, dropdown],
outputs=[
lang_selector, # label update
title_md, # markdown title
notice_html, # notice
input_image, # image label
dropdown, # dropdown (choices/label/value)
custom_cn, # custom label/ph
extra_prompt, # extra label/ph
seed, # seed label
randomize_seed, # randomize label
true_guidance_scale, # tgs label
num_inference_steps, # steps label
run_button, # button text
selected_cn, # label
final_prompt_preview, # label
result_image, # label
status_text, # label
sent_hdr_md, # markdown content
],
)
# 実行:アコーディオンの値も渡す + 言語も渡す
run_button.click(
fn=generate_from_dropdown,
inputs=[input_image, dropdown, custom_cn, extra_prompt, seed, randomize_seed, true_guidance_scale, num_inference_steps, lang_selector],
outputs=[result_image, status_text, final_prompt_small],
)
if __name__ == "__main__":
demo.launch()