# ================================================================================================== # ZERO-SHOT-VIDEO-GENERATION - app.py (Primary Application Interface) # ================================================================================================== # # 📝 DESCRIPTION # This script serves as the main entry point and Gradio-based web interface for the Zero-Shot # Video Generation framework. It provisions the required neural network models and exposes a # user-friendly front-end for generating temporally consistent video content from textual prompts. # The interface is robustly abstracted to handle execution seamlessly across various environments, # inclusive of local execution and cloud instances. # # 👤 AUTHORS # - Amey Thakur (https://github.com/Amey-Thakur) # # 🤝🏻 CREDITS # Based directly on the foundational logic of Text2Video-Zero. # Source Authors: Picsart AI Research (PAIR), UT Austin, U of Oregon, UIUC # Reference: https://arxiv.org/abs/2303.13439 # # 🔗 PROJECT LINKS # Repository: https://github.com/Amey-Thakur/ZERO-SHOT-VIDEO-GENERATION # Live Demo: https://huggingface.co/spaces/ameythakur/Zero-Shot-Video-Generation # Video Demo: https://youtu.be/za9hId6UPoY # # 📅 RELEASE DATE # November 22, 2023 # # 📜 LICENSE # Released under the MIT License # ================================================================================================== import warnings # Suppress unavoidable third-party deprecation warnings (torch.distributed, timm, diffusers). # These originate inside library internals and cannot be fixed from application code. warnings.filterwarnings("ignore", category=FutureWarning) warnings.filterwarnings("ignore", category=UserWarning, message=".*deprecated.*") warnings.filterwarnings("ignore", category=UserWarning, message=".*Mapping deprecated.*") import gradio as gr import torch from model import Model, ModelType from app_text_to_video import create_demo as create_demo_text_to_video import argparse import os # --- ENVIRONMENT & HARDWARE INITIALIZATION --- # Identify the operational environment to conditionally adapt interface parameters. # Checking for 'SPACE_ID' is the robust, platform-agnostic way to detect a Hugging Face Space. on_huggingspace = os.environ.get("SPACE_ID") is not None device = "cuda" if torch.cuda.is_available() else "cpu" # Instantiate the primary generative diffusion model employing Float16 on GPU resources # for memory-efficient tensor operations, and Float32 as a robust computational fallback. model = Model(device=device, dtype=torch.float16 if device == "cuda" else torch.float32) # --- CLI ARGUMENTS PARSING --- # Establishes public accessibility parameters, useful when tunneling standard localhost traffic # securely for temporary external evaluations over the internet. parser = argparse.ArgumentParser() parser.add_argument('--public_access', action='store_true', help="if enabled, the app can be access from a public url", default=False) args = parser.parse_args() # --- WEB INTERFACE ARCHITECTURE --- # Assembles the Gradio Application Block layout, injecting structured HTML context and # encapsulating the discrete video synthesis module instance utilizing the neural pipeline. with gr.Blocks() as demo: gr.HTML( """
Text-to-Video Studio using Temporal Latent Warping & Cross-Frame Attention
© 2023 Amey Thakur | University of Windsor
Research Foundation: Based on foundational breakthroughs in zero-shot temporal consistency by Picsart AI Research (PAIR), UT Austin, U of Oregon, and UIUC.