|
|
|
|
|
|
|
|
import sys |
|
|
import os |
|
|
import time |
|
|
import glob |
|
|
import gc |
|
|
import torch |
|
|
import subprocess |
|
|
import random |
|
|
import argparse |
|
|
import shutil |
|
|
from typing import Sequence, Mapping, Any, Union |
|
|
|
|
|
|
|
|
|
|
|
def to_bool(s: str) -> bool: |
|
|
return s.lower() in ['true', '1', 't', 'y', 'yes', 'on'] |
|
|
|
|
|
def clear_memory(): |
|
|
"""VRAM ๋ฐ RAM ์บ์๋ฅผ ์ ๋ฆฌํฉ๋๋ค.""" |
|
|
if torch.cuda.is_available(): |
|
|
torch.cuda.empty_cache() |
|
|
torch.cuda.ipc_collect() |
|
|
gc.collect() |
|
|
|
|
|
COMFYUI_BASE_PATH = '/content/ComfyUI' |
|
|
|
|
|
def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any: |
|
|
""" ComfyUI ๋
ธ๋ ์ถ๋ ฅ์์ ๊ฐ์ ์์ ํ๊ฒ ๊ฐ์ ธ์ต๋๋ค. """ |
|
|
try: |
|
|
return obj[index] |
|
|
except (KeyError, TypeError): |
|
|
if isinstance(obj, dict) and "result" in obj: |
|
|
return obj["result"][index] |
|
|
raise |
|
|
|
|
|
def add_comfyui_directory_to_sys_path() -> None: |
|
|
""" ComfyUI ๊ฒฝ๋ก๋ฅผ sys.path์ ์ถ๊ฐํฉ๋๋ค. """ |
|
|
if os.path.isdir(COMFYUI_BASE_PATH) and COMFYUI_BASE_PATH not in sys.path: |
|
|
sys.path.append(COMFYUI_BASE_PATH) |
|
|
print(f"'{COMFYUI_BASE_PATH}' added to sys.path") |
|
|
|
|
|
def import_custom_nodes() -> None: |
|
|
""" |
|
|
ComfyUI ์ปค์คํ
๋
ธ๋๋ฅผ ๋ก๋ํ๊ธฐ ์ํด ๋น๋๊ธฐ ํ๊ฒฝ์ ์ด๊ธฐํํฉ๋๋ค. |
|
|
(I2V ์คํฌ๋ฆฝํธ์ import_custom_nodes์ ๋์ผ) |
|
|
""" |
|
|
try: |
|
|
import nest_asyncio |
|
|
nest_asyncio.apply() |
|
|
except ImportError: |
|
|
print("nest_asyncio not found, installing...") |
|
|
try: |
|
|
subprocess.run([sys.executable, "-m", "pip", "install", "-q", "nest_asyncio"], check=True) |
|
|
import nest_asyncio |
|
|
nest_asyncio.apply() |
|
|
print("nest_asyncio installed and applied.") |
|
|
except Exception as e: |
|
|
print(f"Failed to install or apply nest_asyncio: {e}") |
|
|
|
|
|
import asyncio, execution, server |
|
|
from nodes import init_extra_nodes |
|
|
try: |
|
|
loop = asyncio.get_event_loop() |
|
|
if loop.is_closed(): |
|
|
loop = asyncio.new_event_loop() |
|
|
asyncio.set_event_loop(loop) |
|
|
except RuntimeError: |
|
|
loop = asyncio.new_event_loop() |
|
|
asyncio.set_event_loop(loop) |
|
|
|
|
|
server_instance = server.PromptServer(loop) |
|
|
execution.PromptQueue(server_instance) |
|
|
|
|
|
if not loop.is_running(): |
|
|
try: |
|
|
loop.run_until_complete(init_extra_nodes()) |
|
|
except RuntimeError as e: |
|
|
print(f"Note: Could not run init_extra_nodes synchronously: {e}") |
|
|
try: asyncio.ensure_future(init_extra_nodes()) |
|
|
except Exception as fut_e: print(f"Error trying async init_extra_nodes: {fut_e}") |
|
|
else: |
|
|
try: asyncio.ensure_future(init_extra_nodes()) |
|
|
except Exception as fut_e: print(f"Error trying async init_extra_nodes on running loop: {fut_e}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_args(): |
|
|
parser = argparse.ArgumentParser(description="ComfyUI V2A (Video-to-Audio) Generation Script") |
|
|
|
|
|
|
|
|
parser.add_argument("--input_video_path", type=str, required=True, help="์ค๋์ค๋ฅผ ์์ฑํ ์
๋ ฅ ๋น๋์ค ํ์ผ ๊ฒฝ๋ก") |
|
|
parser.add_argument("--prompt", type=str, default="") |
|
|
parser.add_argument("--negative_prompt", type=str, default="") |
|
|
|
|
|
|
|
|
parser.add_argument("--steps", type=int, default=25) |
|
|
parser.add_argument("--cfg", type=float, default=4.5) |
|
|
parser.add_argument("--seed", type=int, default=-1) |
|
|
parser.add_argument("--mask_away_clip", type=str, default="off") |
|
|
parser.add_argument("--force_offload", type=str, default="off") |
|
|
|
|
|
|
|
|
parser.add_argument("--mmaudio_model", type=str, default="mmaudio_large_44k_v2_fp16.safetensors") |
|
|
parser.add_argument("--base_precision", type=str, default="fp16") |
|
|
parser.add_argument("--vae_model", type=str, default="mmaudio_vae_44k_fp16.safetensors") |
|
|
parser.add_argument("--synchformer_model", type=str, default="mmaudio_synchformer_fp16.safetensors") |
|
|
parser.add_argument("--clip_model", type=str, default="apple_DFN5B-CLIP-ViT-H-14-384_fp16.safetensors") |
|
|
parser.add_argument("--mode", type=str, default="44k") |
|
|
parser.add_argument("--precision", type=str, default="fp16", help="Feature Utils Precision") |
|
|
|
|
|
|
|
|
parser.add_argument("--force_rate", type=int, default=0) |
|
|
parser.add_argument("--custom_width", type=int, default=0) |
|
|
parser.add_argument("--custom_height", type=int, default=0) |
|
|
parser.add_argument("--frame_load_cap", type=int, default=0) |
|
|
parser.add_argument("--skip_first_frames", type=int, default=0) |
|
|
parser.add_argument("--select_every_nth", type=int, default=1) |
|
|
parser.add_argument("--load_format", type=str, default="AnimateDiff") |
|
|
|
|
|
|
|
|
parser.add_argument("--loop_count", type=int, default=0) |
|
|
parser.add_argument("--filename_prefix", type=str, default="MMaudio") |
|
|
parser.add_argument("--combine_format", type=str, default="video/h264-mp4") |
|
|
parser.add_argument("--pix_fmt", type=str, default="yuv420p") |
|
|
parser.add_argument("--crf", type=int, default=19) |
|
|
parser.add_argument("--save_metadata", type=str, default="on") |
|
|
parser.add_argument("--trim_to_audio", type=str, default="off") |
|
|
parser.add_argument("--pingpong", type=str, default="off") |
|
|
|
|
|
return parser.parse_args() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main(): |
|
|
args = parse_args() |
|
|
print("๐ V2A ์ค๋์ค ์์ฑ์ ์์ํฉ๋๋ค (VRAM Optimized)...") |
|
|
|
|
|
|
|
|
add_comfyui_directory_to_sys_path() |
|
|
|
|
|
try: |
|
|
from utils.extra_config import load_extra_path_config |
|
|
except ImportError: |
|
|
print("โ ๏ธ ComfyUI์ extra_model_paths.yaml ๋ก๋ฉ ์คํจ (๋ฌด์ํ๊ณ ์งํ)") |
|
|
load_extra_path_config = lambda x: None |
|
|
|
|
|
extra_model_paths_file = os.path.join(COMFYUI_BASE_PATH, "extra_model_paths.yaml") |
|
|
if os.path.exists(extra_model_paths_file): |
|
|
load_extra_path_config(extra_model_paths_file) |
|
|
|
|
|
print("ComfyUI ์ปค์คํ
๋
ธ๋ ์ด๊ธฐํ ์ค...") |
|
|
import_custom_nodes() |
|
|
from nodes import NODE_CLASS_MAPPINGS |
|
|
print("์ปค์คํ
๋
ธ๋ ์ด๊ธฐํ ์๋ฃ.") |
|
|
|
|
|
|
|
|
mmaudiomodelloader = NODE_CLASS_MAPPINGS["MMAudioModelLoader"]() |
|
|
vhs_loadvideo = NODE_CLASS_MAPPINGS["VHS_LoadVideo"]() |
|
|
mmaudiofeatureutilsloader = NODE_CLASS_MAPPINGS["MMAudioFeatureUtilsLoader"]() |
|
|
vhs_videoinfo = NODE_CLASS_MAPPINGS["VHS_VideoInfo"]() |
|
|
mmaudiosampler = NODE_CLASS_MAPPINGS["MMAudioSampler"]() |
|
|
vhs_videocombine = NODE_CLASS_MAPPINGS["VHS_VideoCombine"]() |
|
|
|
|
|
|
|
|
if args.seed == -1: |
|
|
final_seed = random.randint(1, 2**64) |
|
|
print(f" - ๋๋ค ์๋ ์์ฑ: {final_seed}") |
|
|
else: |
|
|
final_seed = args.seed |
|
|
print(f" - ๊ณ ์ ์๋ ์ฌ์ฉ: {final_seed}") |
|
|
|
|
|
|
|
|
with torch.inference_mode(): |
|
|
|
|
|
|
|
|
print(f"\n1๋จ๊ณ: ์ค๋์ค ์์ฑ์ ์ํ ๋น๋์ค ๋ก๋ (25 FPS ๊ฐ์ )... ({args.input_video_path})") |
|
|
vhs_loadvideo_91_audio = vhs_loadvideo.load_video( |
|
|
video=args.input_video_path, |
|
|
force_rate=25, |
|
|
custom_width=args.custom_width, |
|
|
custom_height=args.custom_height, |
|
|
frame_load_cap=args.frame_load_cap, |
|
|
skip_first_frames=args.skip_first_frames, |
|
|
select_every_nth=args.select_every_nth, |
|
|
format=args.load_format, |
|
|
unique_id=random.randint(1, 2**64) |
|
|
) |
|
|
images_for_audio = get_value_at_index(vhs_loadvideo_91_audio, 0) |
|
|
|
|
|
|
|
|
vhs_videoinfo_105 = vhs_videoinfo.get_video_info( |
|
|
video_info=get_value_at_index(vhs_loadvideo_91_audio, 3) |
|
|
) |
|
|
del vhs_loadvideo_91_audio |
|
|
|
|
|
duration = get_value_at_index(vhs_videoinfo_105, 7) |
|
|
original_frame_rate = get_value_at_index(vhs_videoinfo_105, 0) |
|
|
print(f" - ๋น๋์ค ์ ๋ณด: {duration}์ด, ์๋ณธ {original_frame_rate} FPS") |
|
|
clear_memory() |
|
|
|
|
|
|
|
|
print(f"\n2๋จ๊ณ: ์ค๋์ค ๋ชจ๋ธ ๋ก๋ฉ ์ค...") |
|
|
print(f" - MMAudio ๋ชจ๋ธ: {args.mmaudio_model} ({args.base_precision})") |
|
|
mmaudiomodelloader_85 = mmaudiomodelloader.loadmodel( |
|
|
mmaudio_model=args.mmaudio_model, |
|
|
base_precision=args.base_precision |
|
|
) |
|
|
mmaudio_model = get_value_at_index(mmaudiomodelloader_85, 0) |
|
|
|
|
|
print(f" - ์ ํธ๋ฆฌํฐ ๋ชจ๋ธ: (Mode: {args.mode}, Precision: {args.precision})") |
|
|
mmaudiofeatureutilsloader_102 = mmaudiofeatureutilsloader.loadmodel( |
|
|
vae_model=args.vae_model, |
|
|
synchformer_model=args.synchformer_model, |
|
|
clip_model=args.clip_model, |
|
|
mode=args.mode, |
|
|
precision=args.precision |
|
|
) |
|
|
feature_utils = get_value_at_index(mmaudiofeatureutilsloader_102, 0) |
|
|
|
|
|
|
|
|
print(f"\n3๋จ๊ณ: ์ค๋์ค ์์ฑ ์ค... (Steps: {args.steps}, CFG: {args.cfg})") |
|
|
mmaudiosampler_92 = mmaudiosampler.sample( |
|
|
duration=duration, |
|
|
steps=args.steps, |
|
|
cfg=args.cfg, |
|
|
seed=final_seed, |
|
|
prompt=args.prompt, |
|
|
negative_prompt=args.negative_prompt, |
|
|
mask_away_clip=to_bool(args.mask_away_clip), |
|
|
force_offload=to_bool(args.force_offload), |
|
|
mmaudio_model=mmaudio_model, |
|
|
feature_utils=feature_utils, |
|
|
images=images_for_audio |
|
|
) |
|
|
generated_audio = get_value_at_index(mmaudiosampler_92, 0) |
|
|
|
|
|
|
|
|
print(f"\n4๋จ๊ณ: ๋ชจ๋ธ ๋ฐ ์ค๋์ค์ฉ ์ด๋ฏธ์ง ๋ฉ๋ชจ๋ฆฌ ํด์ ์ค...") |
|
|
del mmaudiomodelloader_85, mmaudio_model, mmaudiofeatureutilsloader_102, feature_utils |
|
|
del images_for_audio |
|
|
clear_memory() |
|
|
|
|
|
|
|
|
print(f"\n5๋จ๊ณ: ๋น๋์ค ๊ฒฐํฉ์ ์ํ ์๋ณธ ๋น๋์ค ๋ก๋ (์ฌ์ฉ์ ์ค์ FPS: {args.force_rate})...") |
|
|
vhs_loadvideo_91_combine = vhs_loadvideo.load_video( |
|
|
video=args.input_video_path, |
|
|
force_rate=args.force_rate, |
|
|
custom_width=args.custom_width, |
|
|
custom_height=args.custom_height, |
|
|
frame_load_cap=args.frame_load_cap, |
|
|
skip_first_frames=args.skip_first_frames, |
|
|
select_every_nth=args.select_every_nth, |
|
|
format=args.load_format, |
|
|
unique_id=random.randint(1, 2**64) |
|
|
) |
|
|
images_for_combine = get_value_at_index(vhs_loadvideo_91_combine, 0) |
|
|
del vhs_loadvideo_91_combine |
|
|
clear_memory() |
|
|
|
|
|
|
|
|
print(f"\n6๋จ๊ณ: ๋น๋์ค + ์ค๋์ค ๊ฒฐํฉ ๋ฐ ์ ์ฅ ์ค...") |
|
|
timestamp = time.strftime("%Y%m%d-%H%M%S") |
|
|
final_filename_prefix = f"{args.filename_prefix}_{timestamp}" |
|
|
|
|
|
vhs_videocombine_97 = vhs_videocombine.combine_video( |
|
|
frame_rate=original_frame_rate, |
|
|
loop_count=args.loop_count, |
|
|
filename_prefix=final_filename_prefix, |
|
|
format=args.combine_format, |
|
|
pix_fmt=args.pix_fmt, |
|
|
crf=args.crf, |
|
|
save_metadata=to_bool(args.save_metadata), |
|
|
trim_to_audio=to_bool(args.trim_to_audio), |
|
|
pingpong=to_bool(args.pingpong), |
|
|
save_output=True, |
|
|
images=images_for_combine, |
|
|
audio=generated_audio, |
|
|
unique_id=random.randint(1, 2**64) |
|
|
) |
|
|
|
|
|
|
|
|
del images_for_combine, generated_audio |
|
|
clear_memory() |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
|
|
|
file_path_list = vhs_videocombine_97['result'][0][1] |
|
|
final_video_path = file_path_list[2] |
|
|
except Exception as e: |
|
|
print(f"โ [์ค๋ฅ] ์ต์ข
ํ์ผ ๊ฒฝ๋ก๋ฅผ ์ถ์ถํ๋ ๋ฐ ์คํจํ์ต๋๋ค: {e}") |
|
|
print(f" - ์ ์ฒด ๋ฐํ๊ฐ: {vhs_videocombine_97}") |
|
|
final_video_path = None |
|
|
|
|
|
if final_video_path and os.path.exists(final_video_path): |
|
|
print(f"โ
์ค๋์ค ์์ฑ ๋ฐ ๊ฒฐํฉ ์๋ฃ!") |
|
|
print(f"LATEST_VIDEO_PATH:{final_video_path}") |
|
|
|
|
|
|
|
|
base, ext = os.path.splitext(final_video_path) |
|
|
original_copy_path = f"{base}_original{ext}" |
|
|
try: |
|
|
shutil.copy2(final_video_path, original_copy_path) |
|
|
print(f"โ
์๋ณธ ๋ณต์ฌ๋ณธ ์์ฑ ์๋ฃ: {original_copy_path}") |
|
|
print(f"ORIGINAL_COPY_PATH:{original_copy_path}") |
|
|
except Exception as e: |
|
|
print(f"โ ์๋ณธ ๋ณต์ฌ๋ณธ ์์ฑ ์คํจ: {e}") |
|
|
else: |
|
|
print(f"โ ์ต์ข
๋น๋์ค ํ์ผ ๊ฒฝ๋ก๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค.") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|