File size: 5,915 Bytes
4edb0a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# ==================================================================================================
# ZERO-SHOT-VIDEO-GENERATION - gradio_utils.py (Interface Utilities)
# ==================================================================================================
# 
# 📝 DESCRIPTION
# This utility module provides essential helper functions for the Gradio web interface. It acts 
# as an intermediary data transformation layer, managing the resolution of internal asset paths, 
# interpreting user interactions across various deployment modalities (e.g., Canny edge detection, 
# Pose estimation, Dreambooth fine-tuning), and structurally validating input/output pathways 
# ensuring consistency during the text-to-video associative processing sequences.
#
# 👤 AUTHORS
# - Amey Thakur (https://github.com/Amey-Thakur)
#
# 🤝🏻 CREDITS
# Based directly on the foundational logic of Text2Video-Zero.
# Source Authors: Picsart AI Research (PAIR), UT Austin, U of Oregon, UIUC
# Reference: https://arxiv.org/abs/2303.13439
#
# 🔗 PROJECT LINKS
# Repository: https://github.com/Amey-Thakur/ZERO-SHOT-VIDEO-GENERATION
# Live Demo: https://huggingface.co/spaces/ameythakur/Zero-Shot-Video-Generation
# Video Demo: https://youtu.be/za9hId6UPoY
#
# � RELEASE DATE
# November 22, 2023
#
# �📜 LICENSE
# Released under the MIT License
# ==================================================================================================

import os

# --- CONTROLNET: CANNY EDGE UTILITIES ---
# These functions map symbolic interface selections (like predefined edge maps) to their 
# corresponding physical file paths within the asset directory, ensuring strict structural validation.

def edge_path_to_video_path(edge_path):
    """
    Translates a provided qualitative description or partial path of an edge map to a fully 
    qualified internal asset registry path used during video processing.
    """
    video_path = edge_path

    vid_name = edge_path.split("/")[-1]
    if vid_name == "butterfly.mp4":
        video_path = "__assets__/canny_videos_mp4/butterfly.mp4"
    elif vid_name == "deer.mp4":
        video_path = "__assets__/canny_videos_mp4/deer.mp4"
    elif vid_name == "fox.mp4":
        video_path = "__assets__/canny_videos_mp4/fox.mp4"
    elif vid_name == "girl_dancing.mp4":
        video_path = "__assets__/canny_videos_mp4/girl_dancing.mp4"
    elif vid_name == "girl_turning.mp4":
        video_path = "__assets__/canny_videos_mp4/girl_turning.mp4"
    elif vid_name == "halloween.mp4":
        video_path = "__assets__/canny_videos_mp4/halloween.mp4"
    elif vid_name == "santa.mp4":
        video_path = "__assets__/canny_videos_mp4/santa.mp4"

    # Strict validation ensures subsequent neural tensor loading operations do not encounter IOErrors.
    assert os.path.isfile(video_path)
    return video_path


# --- CONTROLNET: POSE ESTIMATION UTILITIES ---
def motion_to_video_path(motion):
    """
    Translates textual motion directives (e.g., 'Dance 1') into mapped physical skeleton GIF 
    assets utilized for conditioning the temporal generation in Pose methodologies.
    """
    videos = [
        "__assets__/poses_skeleton_gifs/dance1_corr.mp4",
        "__assets__/poses_skeleton_gifs/dance2_corr.mp4",
        "__assets__/poses_skeleton_gifs/dance3_corr.mp4",
        "__assets__/poses_skeleton_gifs/dance4_corr.mp4",
        "__assets__/poses_skeleton_gifs/dance5_corr.mp4"
    ]
    if len(motion.split(" ")) > 1 and motion.split(" ")[1].isnumeric():
        id = int(motion.split(" ")[1]) - 1
        return videos[id]
    else:
        return motion


# --- DREAMBOOTH: ZERO-SHOT INCORPORATION UTILITIES ---
def get_video_from_canny_selection(canny_selection):
    """
    Resolves base video sequences specifically tailored for fine-tuned Dreambooth inference.
    """
    if canny_selection == "woman1":
        input_video_path = "__assets__/db_files_2fps/woman1.mp4"

    elif canny_selection == "woman2":
        input_video_path = "__assets__/db_files_2fps/woman2.mp4"

    elif canny_selection == "man1":
        input_video_path = "__assets__/db_files_2fps/man1.mp4"

    elif canny_selection == "woman3":
        input_video_path = "__assets__/db_files_2fps/woman3.mp4"
    else:
        input_video_path = canny_selection

    assert os.path.isfile(input_video_path)
    return input_video_path


def get_model_from_db_selection(db_selection):
    """
    Translates user-friendly stylistic dropdown options into exact neural checkpoint identifiers 
    hosted on corresponding model hubs.
    """
    if db_selection == "Anime DB":
        input_video_path = 'PAIR/text2video-zero-controlnet-canny-anime'
    elif db_selection == "Avatar DB":
        input_video_path = 'PAIR/text2video-zero-controlnet-canny-avatar'
    elif db_selection == "GTA-5 DB":
        input_video_path = 'PAIR/text2video-zero-controlnet-canny-gta5'
    elif db_selection == "Arcane DB":
        input_video_path = 'PAIR/text2video-zero-controlnet-canny-arcane'
    else:
        input_video_path = db_selection

    return input_video_path


def get_db_name_from_id(id):
    """Auxiliary mapper for Dreambooth stylistic identifiers."""
    db_names = ["Anime DB", "Arcane DB", "GTA-5 DB", "Avatar DB"]
    return db_names[id]


def get_canny_name_from_id(id):
    """Auxiliary mapper for base semantic subjects."""
    canny_names = ["woman1", "woman2", "man1", "woman3"]
    return canny_names[id]


# --- WATERMARKING & ATTRIBUTION ---
def logo_name_to_path(name):
    """
    Interprets watermark selection for programmatic embedding into the terminal composite 
    video frames to enforce attribution.
    """
    logo_paths = {
        'Picsart AI Research': '__assets__/pair_watermark.png',
        'Text2Video-Zero': '__assets__/t2v-z_watermark.png',
        'None': None
    }
    if name in logo_paths:
        return logo_paths[name]
    return name