ameythakur's picture
text2video
4edb0a5 verified
# ==================================================================================================
# ZERO-SHOT-VIDEO-GENERATION - gradio_utils.py (Interface Utilities)
# ==================================================================================================
#
# 📝 DESCRIPTION
# This utility module provides essential helper functions for the Gradio web interface. It acts
# as an intermediary data transformation layer, managing the resolution of internal asset paths,
# interpreting user interactions across various deployment modalities (e.g., Canny edge detection,
# Pose estimation, Dreambooth fine-tuning), and structurally validating input/output pathways
# ensuring consistency during the text-to-video associative processing sequences.
#
# 👤 AUTHORS
# - Amey Thakur (https://github.com/Amey-Thakur)
#
# 🤝🏻 CREDITS
# Based directly on the foundational logic of Text2Video-Zero.
# Source Authors: Picsart AI Research (PAIR), UT Austin, U of Oregon, UIUC
# Reference: https://arxiv.org/abs/2303.13439
#
# 🔗 PROJECT LINKS
# Repository: https://github.com/Amey-Thakur/ZERO-SHOT-VIDEO-GENERATION
# Live Demo: https://huggingface.co/spaces/ameythakur/Zero-Shot-Video-Generation
# Video Demo: https://youtu.be/za9hId6UPoY
#
# � RELEASE DATE
# November 22, 2023
#
# �📜 LICENSE
# Released under the MIT License
# ==================================================================================================
import os
# --- CONTROLNET: CANNY EDGE UTILITIES ---
# These functions map symbolic interface selections (like predefined edge maps) to their
# corresponding physical file paths within the asset directory, ensuring strict structural validation.
def edge_path_to_video_path(edge_path):
"""
Translates a provided qualitative description or partial path of an edge map to a fully
qualified internal asset registry path used during video processing.
"""
video_path = edge_path
vid_name = edge_path.split("/")[-1]
if vid_name == "butterfly.mp4":
video_path = "__assets__/canny_videos_mp4/butterfly.mp4"
elif vid_name == "deer.mp4":
video_path = "__assets__/canny_videos_mp4/deer.mp4"
elif vid_name == "fox.mp4":
video_path = "__assets__/canny_videos_mp4/fox.mp4"
elif vid_name == "girl_dancing.mp4":
video_path = "__assets__/canny_videos_mp4/girl_dancing.mp4"
elif vid_name == "girl_turning.mp4":
video_path = "__assets__/canny_videos_mp4/girl_turning.mp4"
elif vid_name == "halloween.mp4":
video_path = "__assets__/canny_videos_mp4/halloween.mp4"
elif vid_name == "santa.mp4":
video_path = "__assets__/canny_videos_mp4/santa.mp4"
# Strict validation ensures subsequent neural tensor loading operations do not encounter IOErrors.
assert os.path.isfile(video_path)
return video_path
# --- CONTROLNET: POSE ESTIMATION UTILITIES ---
def motion_to_video_path(motion):
"""
Translates textual motion directives (e.g., 'Dance 1') into mapped physical skeleton GIF
assets utilized for conditioning the temporal generation in Pose methodologies.
"""
videos = [
"__assets__/poses_skeleton_gifs/dance1_corr.mp4",
"__assets__/poses_skeleton_gifs/dance2_corr.mp4",
"__assets__/poses_skeleton_gifs/dance3_corr.mp4",
"__assets__/poses_skeleton_gifs/dance4_corr.mp4",
"__assets__/poses_skeleton_gifs/dance5_corr.mp4"
]
if len(motion.split(" ")) > 1 and motion.split(" ")[1].isnumeric():
id = int(motion.split(" ")[1]) - 1
return videos[id]
else:
return motion
# --- DREAMBOOTH: ZERO-SHOT INCORPORATION UTILITIES ---
def get_video_from_canny_selection(canny_selection):
"""
Resolves base video sequences specifically tailored for fine-tuned Dreambooth inference.
"""
if canny_selection == "woman1":
input_video_path = "__assets__/db_files_2fps/woman1.mp4"
elif canny_selection == "woman2":
input_video_path = "__assets__/db_files_2fps/woman2.mp4"
elif canny_selection == "man1":
input_video_path = "__assets__/db_files_2fps/man1.mp4"
elif canny_selection == "woman3":
input_video_path = "__assets__/db_files_2fps/woman3.mp4"
else:
input_video_path = canny_selection
assert os.path.isfile(input_video_path)
return input_video_path
def get_model_from_db_selection(db_selection):
"""
Translates user-friendly stylistic dropdown options into exact neural checkpoint identifiers
hosted on corresponding model hubs.
"""
if db_selection == "Anime DB":
input_video_path = 'PAIR/text2video-zero-controlnet-canny-anime'
elif db_selection == "Avatar DB":
input_video_path = 'PAIR/text2video-zero-controlnet-canny-avatar'
elif db_selection == "GTA-5 DB":
input_video_path = 'PAIR/text2video-zero-controlnet-canny-gta5'
elif db_selection == "Arcane DB":
input_video_path = 'PAIR/text2video-zero-controlnet-canny-arcane'
else:
input_video_path = db_selection
return input_video_path
def get_db_name_from_id(id):
"""Auxiliary mapper for Dreambooth stylistic identifiers."""
db_names = ["Anime DB", "Arcane DB", "GTA-5 DB", "Avatar DB"]
return db_names[id]
def get_canny_name_from_id(id):
"""Auxiliary mapper for base semantic subjects."""
canny_names = ["woman1", "woman2", "man1", "woman3"]
return canny_names[id]
# --- WATERMARKING & ATTRIBUTION ---
def logo_name_to_path(name):
"""
Interprets watermark selection for programmatic embedding into the terminal composite
video frames to enforce attribution.
"""
logo_paths = {
'Picsart AI Research': '__assets__/pair_watermark.png',
'Text2Video-Zero': '__assets__/t2v-z_watermark.png',
'None': None
}
if name in logo_paths:
return logo_paths[name]
return name