File size: 3,456 Bytes
ba96580
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import os
import sys
import time

import torch

current_file_path = os.path.abspath(__file__)
project_roots = [os.path.dirname(current_file_path), os.path.dirname(os.path.dirname(current_file_path)), os.path.dirname(os.path.dirname(os.path.dirname(current_file_path)))]
for project_root in project_roots:
    sys.path.insert(0, project_root) if project_root not in sys.path else None

from videox_fun.api.api import (infer_forward_api,
                               update_diffusion_transformer_api)
from videox_fun.ui.controller import flow_scheduler_dict
from videox_fun.ui.wan2_2_fun_ui import ui, ui_client, ui_host

if __name__ == "__main__":
    # Choose the ui mode  
    # "normal" refers to the standard UI, which allows users to click to switch models, change model types, and more. 
    # "host" represents the hosting mode, where the model is loaded directly at startup and can be accessed via 
    #        the API to return generation results. 
    # "client" represents the client mode, offering a simple UI that sends requests to a remote API for generation.
    ui_mode = "normal"
    
    # GPU memory mode, which can be chosen in [model_full_load, model_cpu_offload, model_cpu_offload_and_qfloat8, sequential_cpu_offload].
    # model_full_load means that the entire model will be moved to the GPU.
    # 
    # model_cpu_offload means that the entire model will be moved to the CPU after use, which can save some GPU memory.
    # 
    # model_cpu_offload_and_qfloat8 indicates that the entire model will be moved to the CPU after use, 
    # and the transformer model has been quantized to float8, which can save more GPU memory. 
    # 
    # sequential_cpu_offload means that each layer of the model will be moved to the CPU after use, 
    # resulting in slower speeds but saving a large amount of GPU memory.
    GPU_memory_mode = "sequential_cpu_offload"
    # Compile will give a speedup in fixed resolution and need a little GPU memory. 
    # The compile_dit is not compatible with the fsdp_dit and sequential_cpu_offload.
    compile_dit = False

    # Use torch.float16 if GPU does not support torch.bfloat16
    # ome graphics cards, such as v100, 2080ti, do not support torch.bfloat16
    weight_dtype = torch.bfloat16

    # Server ip
    server_name = "0.0.0.0"
    server_port = 7860

    # Config path
    config_path = "config/wan2.2/wan_civitai_i2v.yaml"
    # Params below is used when ui_mode = "host"
    # Model path of the pretrained model
    model_name = "models/Diffusion_Transformer/Wan2.2-Fun-A14B-InP"
    # "Inpaint" or "Control"
    model_type = "Inpaint"

    if ui_mode == "host":
        demo, controller = ui_host(GPU_memory_mode, flow_scheduler_dict, model_name, model_type, config_path, compile_dit, weight_dtype)
    elif ui_mode == "client":
        demo, controller = ui_client(flow_scheduler_dict, model_name)
    else:
        demo, controller = ui(GPU_memory_mode, flow_scheduler_dict, config_path, compile_dit, weight_dtype)

    def gr_launch():
        # launch gradio
        app, _, _ = demo.queue(status_update_rate=1).launch(
            server_name=server_name,
            server_port=server_port,
            prevent_thread_lock=True
        )
        
        # launch api
        infer_forward_api(None, app, controller)
        update_diffusion_transformer_api(None, app, controller)
    
    gr_launch()
        
    # not close the python
    while True:
        time.sleep(5)