Flux-VIDEO / app.py
openfree's picture
Update app.py
ee252b7 verified
raw
history blame
24.5 kB
import gradio as gr
import numpy as np
from PIL import Image, ImageDraw
from gradio_client import Client, handle_file
import random
import tempfile
import os
import logging
import torch
from diffusers import AutoencoderKL, TCDScheduler
from diffusers.models.model_loading_utils import load_state_dict
from huggingface_hub import hf_hub_download
# Spaces GPU
try:
import spaces
except:
# GPU λ°μ½”λ ˆμ΄ν„°κ°€ 없을 λ•Œλ₯Ό μœ„ν•œ 더미 λ°μ½”λ ˆμ΄ν„°
class spaces:
@staticmethod
def GPU(duration=None):
def decorator(func):
return func
return decorator
# ν™˜κ²½ λ³€μˆ˜ μ„€μ •
os.environ["TRANSFORMERS_ALLOW_UNSAFE_DESERIALIZATION"] = "1"
# MMAudio κ΄€λ ¨ μž„ν¬νŠΈ
try:
import mmaudio
from mmaudio.eval_utils import (ModelConfig, all_model_cfg, generate, load_video, make_video,
setup_eval_logging)
from mmaudio.model.flow_matching import FlowMatching
from mmaudio.model.networks import MMAudio, get_my_mmaudio
from mmaudio.model.sequence_config import SequenceConfig
from mmaudio.model.utils.features_utils import FeaturesUtils
MMAUDIO_AVAILABLE = True
except ImportError:
MMAUDIO_AVAILABLE = False
logging.warning("MMAudio not available. Sound generation will be disabled.")
# ControlNet λͺ¨λΈ λ‘œλ“œ
try:
from controlnet_union import ControlNetModel_Union
from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline
# ControlNet μ„€μ • 및 λ‘œλ“œ
config_file = hf_hub_download(
"xinsir/controlnet-union-sdxl-1.0",
filename="config_promax.json",
)
config = ControlNetModel_Union.load_config(config_file)
controlnet_model = ControlNetModel_Union.from_config(config)
model_file = hf_hub_download(
"xinsir/controlnet-union-sdxl-1.0",
filename="diffusion_pytorch_model_promax.safetensors",
)
state_dict = load_state_dict(model_file)
loaded_keys = list(state_dict.keys())
result = ControlNetModel_Union._load_pretrained_model(
controlnet_model, state_dict, model_file, "xinsir/controlnet-union-sdxl-1.0", loaded_keys
)
model = result[0]
model = model.to(device="cuda", dtype=torch.float16)
# VAE λ‘œλ“œ
vae = AutoencoderKL.from_pretrained(
"madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
).to("cuda")
# νŒŒμ΄ν”„λΌμΈ λ‘œλ“œ
pipe = StableDiffusionXLFillPipeline.from_pretrained(
"SG161222/RealVisXL_V5.0_Lightning",
torch_dtype=torch.float16,
vae=vae,
controlnet=model,
variant="fp16",
).to("cuda")
pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
OUTPAINT_MODEL_LOADED = True
except Exception as e:
logging.error(f"Failed to load outpainting models: {str(e)}")
OUTPAINT_MODEL_LOADED = False
# MMAudio λͺ¨λΈ μ„€μ • 및 λ‘œλ“œ
if MMAUDIO_AVAILABLE:
try:
# CUDA μ„€μ •
if torch.cuda.is_available():
device = torch.device("cuda")
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
torch.backends.cudnn.benchmark = True
else:
device = torch.device("cpu")
dtype = torch.bfloat16
# λͺ¨λΈ μ„€μ •
model_cfg: ModelConfig = all_model_cfg['large_44k_v2']
model_cfg.download_if_needed()
setup_eval_logging()
# λͺ¨λΈ λ‘œλ“œ
def get_mmaudio_model():
with torch.cuda.device(device):
seq_cfg = model_cfg.seq_cfg
net: MMAudio = get_my_mmaudio(model_cfg.model_name).to(device, dtype).eval()
net.load_weights(torch.load(model_cfg.model_path, map_location=device, weights_only=True))
logging.info(f'Loaded MMAudio weights from {model_cfg.model_path}')
feature_utils = FeaturesUtils(
tod_vae_ckpt=model_cfg.vae_path,
synchformer_ckpt=model_cfg.synchformer_ckpt,
enable_conditions=True,
mode=model_cfg.mode,
bigvgan_vocoder_ckpt=model_cfg.bigvgan_16k_path,
need_vae_encoder=False
).to(device, dtype).eval()
return net, feature_utils, seq_cfg
mmaudio_net, mmaudio_feature_utils, mmaudio_seq_cfg = get_mmaudio_model()
MMAUDIO_LOADED = True
except Exception as e:
logging.error(f"Failed to load MMAudio models: {str(e)}")
MMAUDIO_LOADED = False
else:
MMAUDIO_LOADED = False
# API URLs
TEXT2IMG_API_URL = "http://211.233.58.201:7896"
VIDEO_API_URL = "http://211.233.58.201:7875"
# λ‘œκΉ… μ„€μ •
logging.basicConfig(level=logging.INFO)
# Image size presets
IMAGE_PRESETS = {
"μ»€μŠ€ν…€": {"width": 1024, "height": 1024},
"1:1 μ •μ‚¬κ°ν˜•": {"width": 1024, "height": 1024},
"4:3 ν‘œμ€€": {"width": 1024, "height": 768},
"16:9 μ™€μ΄λ“œμŠ€ν¬λ¦°": {"width": 1024, "height": 576},
"9:16 μ„Έλ‘œν˜•": {"width": 576, "height": 1024},
"6:19 특수 μ„Έλ‘œν˜•": {"width": 324, "height": 1024},
"Instagram μ •μ‚¬κ°ν˜•": {"width": 1080, "height": 1080},
"Instagram μŠ€ν† λ¦¬": {"width": 1080, "height": 1920},
"Instagram κ°€λ‘œν˜•": {"width": 1080, "height": 566},
"Facebook 컀버": {"width": 820, "height": 312},
"Twitter 헀더": {"width": 1500, "height": 500},
"YouTube 썸넀일": {"width": 1280, "height": 720},
"LinkedIn λ°°λ„ˆ": {"width": 1584, "height": 396},
}
def update_dimensions(preset):
if preset in IMAGE_PRESETS:
return IMAGE_PRESETS[preset]["width"], IMAGE_PRESETS[preset]["height"]
return 1024, 1024
def generate_text_to_image(prompt, width, height, guidance, inference_steps, seed):
if not prompt:
return None, "ν”„λ‘¬ν”„νŠΈλ₯Ό μž…λ ₯ν•΄μ£Όμ„Έμš”"
try:
client = Client(TEXT2IMG_API_URL)
if seed == -1:
seed = random.randint(0, 9999999)
result = client.predict(
prompt=prompt,
width=int(width),
height=int(height),
guidance=float(guidance),
inference_steps=int(inference_steps),
seed=int(seed),
do_img2img=False,
init_image=None,
image2image_strength=0.8,
resize_img=True,
api_name="/generate_image"
)
return result[0], f"μ‚¬μš©λœ μ‹œλ“œ: {result[1]}"
except Exception as e:
logging.error(f"Image generation error: {str(e)}")
return None, f"였λ₯˜: {str(e)}"
@spaces.GPU(duration=60)
@torch.inference_mode()
def video_to_audio(video_path, prompt, negative_prompt="music", seed=0, num_steps=25, cfg_strength=4.5, duration=8.0):
"""λΉ„λ””μ˜€μ— μ‚¬μš΄λ“œλ₯Ό μΆ”κ°€ν•˜λŠ” ν•¨μˆ˜"""
if not MMAUDIO_LOADED:
logging.error("MMAudio model not loaded")
return video_path
try:
rng = torch.Generator(device=device)
rng.manual_seed(seed)
fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
# λΉ„λ””μ˜€ λ‘œλ“œ
clip_frames, sync_frames, actual_duration = load_video(video_path, duration)
clip_frames = clip_frames.unsqueeze(0)
sync_frames = sync_frames.unsqueeze(0)
mmaudio_seq_cfg.duration = actual_duration
mmaudio_net.update_seq_lengths(mmaudio_seq_cfg.latent_seq_len, mmaudio_seq_cfg.clip_seq_len, mmaudio_seq_cfg.sync_seq_len)
# μ˜€λ””μ˜€ 생성
audios = generate(clip_frames,
sync_frames, [prompt],
negative_text=[negative_prompt],
feature_utils=mmaudio_feature_utils,
net=mmaudio_net,
fm=fm,
rng=rng,
cfg_strength=cfg_strength)
audio = audios.float().cpu()[0]
# λΉ„λ””μ˜€μ™€ μ˜€λ””μ˜€ κ²°ν•©
video_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
make_video(video_path,
video_save_path,
audio,
sampling_rate=mmaudio_seq_cfg.sampling_rate,
duration_sec=mmaudio_seq_cfg.duration)
return video_save_path
except Exception as e:
logging.error(f"Video to audio error: {str(e)}")
return video_path
def generate_video_from_image(image, prompt="", length=4.0, sound_generation="μ‚¬μš΄λ“œ μ—†μŒ", sound_prompt="", sound_negative_prompt="music"):
if image is None:
return None
try:
# 이미지 μ €μž₯
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as fp:
temp_path = fp.name
Image.fromarray(image).save(temp_path)
# λΉ„λ””μ˜€ 생성 API 호좜
client = Client(VIDEO_API_URL)
result = client.predict(
input_image=handle_file(temp_path),
prompt=prompt if prompt else "Generate natural motion",
n_prompt="",
seed=random.randint(0, 9999999),
use_teacache=True,
video_length=float(length),
api_name="/process"
)
os.unlink(temp_path)
if result and len(result) > 0:
video_dict = result[0]
video_path = video_dict.get("video") if isinstance(video_dict, dict) else None
# μ‚¬μš΄λ“œ 생성 μ˜΅μ…˜μ΄ μ„ νƒλœ 경우
if video_path and sound_generation == "μ‚¬μš΄λ“œ 생성" and MMAUDIO_LOADED:
# μ‚¬μš΄λ“œ ν”„λ‘¬ν”„νŠΈκ°€ λΉ„μ–΄μžˆμœΌλ©΄ κΈ°λ³Έκ°’ μ‚¬μš©
if not sound_prompt:
sound_prompt = prompt if prompt else "ambient sound"
# λΉ„λ””μ˜€μ— μ‚¬μš΄λ“œ μΆ”κ°€
video_with_sound = video_to_audio(
video_path,
sound_prompt,
sound_negative_prompt,
duration=length
)
return video_with_sound
return video_path
except Exception as e:
logging.error(f"Video generation error: {str(e)}")
return None
def prepare_image_and_mask(image, width, height, overlap_percentage, alignment):
"""이미지와 마슀크λ₯Ό μ€€λΉ„ν•˜λŠ” ν•¨μˆ˜"""
if image is None:
return None, None
# PIL μ΄λ―Έμ§€λ‘œ λ³€ν™˜
if isinstance(image, np.ndarray):
image = Image.fromarray(image).convert('RGB')
target_size = (width, height)
# 이미지λ₯Ό νƒ€κ²Ÿ 크기에 맞게 μ‘°μ •
scale_factor = min(target_size[0] / image.width, target_size[1] / image.height)
new_width = int(image.width * scale_factor)
new_height = int(image.height * scale_factor)
# 이미지 λ¦¬μ‚¬μ΄μ¦ˆ
source = image.resize((new_width, new_height), Image.LANCZOS)
# μ˜€λ²„λž© 계산
overlap_x = int(new_width * (overlap_percentage / 100))
overlap_y = int(new_height * (overlap_percentage / 100))
overlap_x = max(overlap_x, 1)
overlap_y = max(overlap_y, 1)
# 정렬에 λ”°λ₯Έ λ§ˆμ§„ 계산
if alignment == "κ°€μš΄λ°":
margin_x = (target_size[0] - new_width) // 2
margin_y = (target_size[1] - new_height) // 2
elif alignment == "μ™Όμͺ½":
margin_x = 0
margin_y = (target_size[1] - new_height) // 2
elif alignment == "였λ₯Έμͺ½":
margin_x = target_size[0] - new_width
margin_y = (target_size[1] - new_height) // 2
elif alignment == "μœ„":
margin_x = (target_size[0] - new_width) // 2
margin_y = 0
elif alignment == "μ•„λž˜":
margin_x = (target_size[0] - new_width) // 2
margin_y = target_size[1] - new_height
# λ°°κ²½ 이미지 생성
background = Image.new('RGB', target_size, (255, 255, 255))
background.paste(source, (margin_x, margin_y))
# 마슀크 생성
mask = Image.new('L', target_size, 255)
mask_draw = ImageDraw.Draw(mask)
# 마슀크 μ˜μ—­ 그리기
white_gaps_patch = 2
left_overlap = margin_x + overlap_x if alignment != "μ™Όμͺ½" else margin_x
right_overlap = margin_x + new_width - overlap_x if alignment != "였λ₯Έμͺ½" else margin_x + new_width
top_overlap = margin_y + overlap_y if alignment != "μœ„" else margin_y
bottom_overlap = margin_y + new_height - overlap_y if alignment != "μ•„λž˜" else margin_y + new_height
mask_draw.rectangle([
(left_overlap, top_overlap),
(right_overlap, bottom_overlap)
], fill=0)
return background, mask
@spaces.GPU(duration=24)
def outpaint_image(image, prompt, width, height, overlap_percentage, alignment, num_steps=8):
"""이미지 μ•„μ›ƒνŽ˜μΈνŒ… μ‹€ν–‰"""
if image is None:
return None
if not OUTPAINT_MODEL_LOADED:
return Image.new('RGB', (width, height), (200, 200, 200))
try:
# 이미지와 마슀크 μ€€λΉ„
background, mask = prepare_image_and_mask(image, width, height, overlap_percentage, alignment)
if background is None:
return None
# cnet_image 생성 (마슀크 μ˜μ—­μ„ κ²€μ€μƒ‰μœΌλ‘œ)
cnet_image = background.copy()
cnet_image.paste(0, (0, 0), mask)
# ν”„λ‘¬ν”„νŠΈ μ€€λΉ„
final_prompt = f"{prompt}, high quality, 4k" if prompt else "high quality, 4k"
# GPUμ—μ„œ μ‹€ν–‰
with torch.autocast(device_type="cuda", dtype=torch.float16):
(
prompt_embeds,
negative_prompt_embeds,
pooled_prompt_embeds,
negative_pooled_prompt_embeds,
) = pipe.encode_prompt(final_prompt, "cuda", True)
# 생성 ν”„λ‘œμ„ΈμŠ€
for generated_image in pipe(
prompt_embeds=prompt_embeds,
negative_prompt_embeds=negative_prompt_embeds,
pooled_prompt_embeds=pooled_prompt_embeds,
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
image=cnet_image,
num_inference_steps=num_steps
):
# 쀑간 κ²°κ³Ό (ν•„μš”μ‹œ μ‚¬μš©)
pass
# μ΅œμ’… 이미지
final_image = generated_image
# RGBA둜 λ³€ν™˜ν•˜κ³  마슀크 적용
final_image = final_image.convert("RGBA")
cnet_image.paste(final_image, (0, 0), mask)
return cnet_image
except Exception as e:
logging.error(f"Outpainting error: {str(e)}")
return background if 'background' in locals() else None
# CSS
css = """
:root {
--primary-color: #f8c3cd;
--secondary-color: #b3e5fc;
--background-color: #f5f5f7;
--card-background: #ffffff;
--text-color: #424242;
--accent-color: #ffb6c1;
--success-color: #c8e6c9;
--warning-color: #fff9c4;
--shadow-color: rgba(0, 0, 0, 0.1);
--border-radius: 12px;
}
.gradio-container {
max-width: 1200px !important;
margin: 0 auto !important;
}
.panel-box {
border-radius: var(--border-radius) !important;
box-shadow: 0 8px 16px var(--shadow-color) !important;
background-color: var(--card-background) !important;
padding: 20px !important;
margin-bottom: 20px !important;
}
#generate-btn, #video-btn, #outpaint-btn {
background: linear-gradient(135deg, #ff9a9e, #fad0c4) !important;
font-size: 1.1rem !important;
padding: 12px 24px !important;
margin-top: 10px !important;
width: 100% !important;
}
.tabitem {
min-height: 700px !important;
}
"""
# Gradio Interface
demo = gr.Blocks(css=css, title="AI 이미지 & λΉ„λ””μ˜€ 생성기")
with demo:
gr.Markdown("# 🎨 Ginigen μŠ€νŠœλ””μ˜€")
with gr.Tabs() as tabs:
# 첫 번째 νƒ­: ν…μŠ€νŠΈ to 이미지
with gr.Tab("ν…μŠ€νŠΈβ†’μ΄λ―Έμ§€β†’λΉ„λ””μ˜€", elem_classes="tabitem"):
with gr.Row(equal_height=True):
# μž…λ ₯ 컬럼
with gr.Column(scale=1):
with gr.Group(elem_classes="panel-box"):
gr.Markdown("### πŸ“ 이미지 생성 μ„€μ •")
prompt = gr.Textbox(
label="ν”„λ‘¬ν”„νŠΈ(ν•œκΈ€/μ˜μ–΄ κ°€λŠ₯)",
placeholder="μƒμ„±ν•˜κ³  싢은 이미지λ₯Ό μ„€λͺ…ν•˜μ„Έμš”...",
lines=3
)
size_preset = gr.Dropdown(
choices=list(IMAGE_PRESETS.keys()),
value="1:1 μ •μ‚¬κ°ν˜•",
label="크기 프리셋"
)
with gr.Row():
width = gr.Slider(256, 2048, 1024, step=64, label="λ„ˆλΉ„")
height = gr.Slider(256, 2048, 1024, step=64, label="높이")
with gr.Row():
guidance = gr.Slider(1.0, 20.0, 3.5, step=0.1, label="κ°€μ΄λ˜μŠ€")
steps = gr.Slider(1, 50, 30, step=1, label="μŠ€ν…")
seed = gr.Number(label="μ‹œλ“œ (-1=랜덀)", value=-1)
generate_btn = gr.Button("🎨 이미지 생성", variant="primary", elem_id="generate-btn")
with gr.Group(elem_classes="panel-box"):
gr.Markdown("### 🎬 λΉ„λ””μ˜€ 생성 μ„€μ •")
video_prompt = gr.Textbox(
label="(선택) λΉ„λ””μ˜€ ν”„λ‘¬ν”„νŠΈ(μ˜μ–΄λ‘œ μž…λ ₯)",
placeholder="λΉ„λ””μ˜€μ˜ μ›€μ§μž„μ„ μ„€λͺ…ν•˜μ„Έμš”... (λΉ„μ›Œλ‘λ©΄ κΈ°λ³Έ μ›€μ§μž„ 적용)",
lines=2
)
video_length = gr.Slider(
minimum=1,
maximum=60,
value=4,
step=0.5,
label="λΉ„λ””μ˜€ 길이 (초)",
info="1μ΄ˆμ—μ„œ 60μ΄ˆκΉŒμ§€ 선택 κ°€λŠ₯ν•©λ‹ˆλ‹€"
)
# μ‚¬μš΄λ“œ 생성 μ˜΅μ…˜ μΆ”κ°€
sound_generation = gr.Radio(
choices=["μ‚¬μš΄λ“œ μ—†μŒ", "μ‚¬μš΄λ“œ 생성"],
value="μ‚¬μš΄λ“œ μ—†μŒ",
label="μ‚¬μš΄λ“œ μ˜΅μ…˜",
info="λΉ„λ””μ˜€μ— μ‚¬μš΄λ“œλ₯Ό μΆ”κ°€ν• μ§€ μ„ νƒν•˜μ„Έμš”"
)
# μ‚¬μš΄λ“œ κ΄€λ ¨ μž…λ ₯ ν•„λ“œ (쑰건뢀 ν‘œμ‹œ)
with gr.Column(visible=False) as sound_options:
sound_prompt = gr.Textbox(
label="μ‚¬μš΄λ“œ ν”„λ‘¬ν”„νŠΈ (선택)",
placeholder="생성할 μ‚¬μš΄λ“œλ₯Ό μ„€λͺ…ν•˜μ„Έμš”... (λΉ„μ›Œλ‘λ©΄ λΉ„λ””μ˜€ ν”„λ‘¬ν”„νŠΈ μ‚¬μš©)",
lines=2
)
sound_negative_prompt = gr.Textbox(
label="μ‚¬μš΄λ“œ λ„€κ±°ν‹°λΈŒ ν”„λ‘¬ν”„νŠΈ",
value="music",
lines=1
)
video_btn = gr.Button("🎬 λΉ„λ””μ˜€λ‘œ λ³€ν™˜", variant="secondary", elem_id="video-btn")
# 좜λ ₯ 컬럼
with gr.Column(scale=1):
with gr.Group(elem_classes="panel-box"):
gr.Markdown("### πŸ–ΌοΈ 생성 κ²°κ³Ό")
output_image = gr.Image(label="μƒμ„±λœ 이미지", type="numpy")
output_seed = gr.Textbox(label="μ‹œλ“œ 정보")
output_video = gr.Video(label="μƒμ„±λœ λΉ„λ””μ˜€")
# 두 번째 νƒ­: 이미지 μ•„μ›ƒνŽ˜μΈνŒ…
with gr.Tab("이미지 λΉ„μœ¨ λ³€κ²½/생성", elem_classes="tabitem"):
with gr.Row(equal_height=True):
# μž…λ ₯ 컬럼
with gr.Column(scale=1):
with gr.Group(elem_classes="panel-box"):
gr.Markdown("### πŸ–ΌοΈ 이미지 μ—…λ‘œλ“œ")
input_image = gr.Image(
label="원본 이미지",
type="numpy"
)
outpaint_prompt = gr.Textbox(
label="ν”„λ‘¬ν”„νŠΈ (선택)",
placeholder="ν™•μž₯ν•  μ˜μ—­μ— λŒ€ν•œ μ„€λͺ…...",
lines=2
)
with gr.Group(elem_classes="panel-box"):
gr.Markdown("### βš™οΈ μ•„μ›ƒνŽ˜μΈνŒ… μ„€μ •")
outpaint_size_preset = gr.Dropdown(
choices=list(IMAGE_PRESETS.keys()),
value="16:9 μ™€μ΄λ“œμŠ€ν¬λ¦°",
label="λͺ©ν‘œ 크기 프리셋"
)
with gr.Row():
outpaint_width = gr.Slider(256, 2048, 1280, step=64, label="λͺ©ν‘œ λ„ˆλΉ„")
outpaint_height = gr.Slider(256, 2048, 720, step=64, label="λͺ©ν‘œ 높이")
alignment = gr.Dropdown(
choices=["κ°€μš΄λ°", "μ™Όμͺ½", "였λ₯Έμͺ½", "μœ„", "μ•„λž˜"],
value="κ°€μš΄λ°",
label="μ •λ ¬"
)
overlap_percentage = gr.Slider(
minimum=1,
maximum=50,
value=10,
step=1,
label="마슀크 μ˜€λ²„λž© (%)"
)
outpaint_steps = gr.Slider(
minimum=4,
maximum=12,
value=8,
step=1,
label="μΆ”λ‘  μŠ€ν…"
)
outpaint_btn = gr.Button("🎨 μ•„μ›ƒνŽ˜μΈνŒ… μ‹€ν–‰", variant="primary", elem_id="outpaint-btn")
# 좜λ ₯ 컬럼
with gr.Column(scale=1):
with gr.Group(elem_classes="panel-box"):
gr.Markdown("### πŸ–ΌοΈ κ²°κ³Ό")
outpaint_result = gr.Image(label="μ•„μ›ƒνŽ˜μΈνŒ… κ²°κ³Ό")
# 이벀트 μ—°κ²° - 첫 번째 νƒ­
size_preset.change(update_dimensions, [size_preset], [width, height])
generate_btn.click(
generate_text_to_image,
[prompt, width, height, guidance, steps, seed],
[output_image, output_seed]
)
# μ‚¬μš΄λ“œ μ˜΅μ…˜ ν‘œμ‹œ/μˆ¨κΉ€
def toggle_sound_options(choice):
return gr.update(visible=(choice == "μ‚¬μš΄λ“œ 생성"))
sound_generation.change(
toggle_sound_options,
[sound_generation],
[sound_options]
)
video_btn.click(
generate_video_from_image,
[output_image, video_prompt, video_length, sound_generation, sound_prompt, sound_negative_prompt],
[output_video]
)
# 이벀트 μ—°κ²° - 두 번째 νƒ­
outpaint_size_preset.change(update_dimensions, [outpaint_size_preset], [outpaint_width, outpaint_height])
outpaint_btn.click(
outpaint_image,
[input_image, outpaint_prompt, outpaint_width, outpaint_height, overlap_percentage, alignment, outpaint_steps],
[outpaint_result]
)
demo.launch()