AXERA-TECH
/

Z-Image-Turbo

Model card Files Files and versions

Z-Image-Turbo / VideoX-Fun /examples /wan2.2_fun /app.py

yongqiang

initialize this repo

ba96580 20 days ago

history blame contribute delete

3.46 kB

	import os
	import sys
	import time

	import torch

	current_file_path = os.path.abspath(__file__)
	project_roots = [os.path.dirname(current_file_path), os.path.dirname(os.path.dirname(current_file_path)), os.path.dirname(os.path.dirname(os.path.dirname(current_file_path)))]
	for project_root in project_roots:
	sys.path.insert(0, project_root) if project_root not in sys.path else None

	from videox_fun.api.api import (infer_forward_api,
	update_diffusion_transformer_api)
	from videox_fun.ui.controller import flow_scheduler_dict
	from videox_fun.ui.wan2_2_fun_ui import ui, ui_client, ui_host

	if __name__ == "__main__":
	# Choose the ui mode
	# "normal" refers to the standard UI, which allows users to click to switch models, change model types, and more.
	# "host" represents the hosting mode, where the model is loaded directly at startup and can be accessed via
	# the API to return generation results.
	# "client" represents the client mode, offering a simple UI that sends requests to a remote API for generation.
	ui_mode = "normal"

	# GPU memory mode, which can be chosen in [model_full_load, model_cpu_offload, model_cpu_offload_and_qfloat8, sequential_cpu_offload].
	# model_full_load means that the entire model will be moved to the GPU.
	#
	# model_cpu_offload means that the entire model will be moved to the CPU after use, which can save some GPU memory.
	#
	# model_cpu_offload_and_qfloat8 indicates that the entire model will be moved to the CPU after use,
	# and the transformer model has been quantized to float8, which can save more GPU memory.
	#
	# sequential_cpu_offload means that each layer of the model will be moved to the CPU after use,
	# resulting in slower speeds but saving a large amount of GPU memory.
	GPU_memory_mode = "sequential_cpu_offload"
	# Compile will give a speedup in fixed resolution and need a little GPU memory.
	# The compile_dit is not compatible with the fsdp_dit and sequential_cpu_offload.
	compile_dit = False

	# Use torch.float16 if GPU does not support torch.bfloat16
	# ome graphics cards, such as v100, 2080ti, do not support torch.bfloat16
	weight_dtype = torch.bfloat16

	# Server ip
	server_name = "0.0.0.0"
	server_port = 7860

	# Config path
	config_path = "config/wan2.2/wan_civitai_i2v.yaml"
	# Params below is used when ui_mode = "host"
	# Model path of the pretrained model
	model_name = "models/Diffusion_Transformer/Wan2.2-Fun-A14B-InP"
	# "Inpaint" or "Control"
	model_type = "Inpaint"

	if ui_mode == "host":
	demo, controller = ui_host(GPU_memory_mode, flow_scheduler_dict, model_name, model_type, config_path, compile_dit, weight_dtype)
	elif ui_mode == "client":
	demo, controller = ui_client(flow_scheduler_dict, model_name)
	else:
	demo, controller = ui(GPU_memory_mode, flow_scheduler_dict, config_path, compile_dit, weight_dtype)

	def gr_launch():
	# launch gradio
	app, _, _ = demo.queue(status_update_rate=1).launch(
	server_name=server_name,
	server_port=server_port,
	prevent_thread_lock=True
	)

	# launch api
	infer_forward_api(None, app, controller)
	update_diffusion_transformer_api(None, app, controller)

	gr_launch()

	# not close the python
	while True:
	time.sleep(5)