ameythakur's picture
text2video
57868fa verified
# ==================================================================================================
# ZERO-SHOT-VIDEO-GENERATION - app.py (Primary Application Interface)
# ==================================================================================================
#
# πŸ“ DESCRIPTION
# This script serves as the main entry point and Gradio-based web interface for the Zero-Shot
# Video Generation framework. It provisions the required neural network models and exposes a
# user-friendly front-end for generating temporally consistent video content from textual prompts.
# The interface is robustly abstracted to handle execution seamlessly across various environments,
# inclusive of local execution and cloud instances.
#
# πŸ‘€ AUTHORS
# - Amey Thakur (https://github.com/Amey-Thakur)
#
# 🀝🏻 CREDITS
# Based directly on the foundational logic of Text2Video-Zero.
# Source Authors: Picsart AI Research (PAIR), UT Austin, U of Oregon, UIUC
# Reference: https://arxiv.org/abs/2303.13439
#
# πŸ”— PROJECT LINKS
# Repository: https://github.com/Amey-Thakur/ZERO-SHOT-VIDEO-GENERATION
# Live Demo: https://huggingface.co/spaces/ameythakur/Zero-Shot-Video-Generation
# Video Demo: https://youtu.be/za9hId6UPoY
#
# πŸ“… RELEASE DATE
# November 22, 2023
#
# πŸ“œ LICENSE
# Released under the MIT License
# ==================================================================================================
import warnings
# Suppress unavoidable third-party deprecation warnings (torch.distributed, timm, diffusers).
# These originate inside library internals and cannot be fixed from application code.
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning, message=".*deprecated.*")
warnings.filterwarnings("ignore", category=UserWarning, message=".*Mapping deprecated.*")
import gradio as gr
import torch
from model import Model, ModelType
from app_text_to_video import create_demo as create_demo_text_to_video
import argparse
import os
# --- ENVIRONMENT & HARDWARE INITIALIZATION ---
# Identify the operational environment to conditionally adapt interface parameters.
# Checking for 'SPACE_ID' is the robust, platform-agnostic way to detect a Hugging Face Space.
on_huggingspace = os.environ.get("SPACE_ID") is not None
device = "cuda" if torch.cuda.is_available() else "cpu"
# Instantiate the primary generative diffusion model employing Float16 on GPU resources
# for memory-efficient tensor operations, and Float32 as a robust computational fallback.
model = Model(device=device, dtype=torch.float16 if device == "cuda" else torch.float32)
# --- CLI ARGUMENTS PARSING ---
# Establishes public accessibility parameters, useful when tunneling standard localhost traffic
# securely for temporary external evaluations over the internet.
parser = argparse.ArgumentParser()
parser.add_argument('--public_access', action='store_true',
help="if enabled, the app can be access from a public url", default=False)
args = parser.parse_args()
# --- WEB INTERFACE ARCHITECTURE ---
# Assembles the Gradio Application Block layout, injecting structured HTML context and
# encapsulating the discrete video synthesis module instance utilizing the neural pipeline.
with gr.Blocks() as demo:
gr.HTML(
"""
<style>
.title-link {
color: white !important;
text-decoration: none !important;
border-bottom: none !important;
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
display: block;
}
.title-link:hover {
transform: scale(1.01);
text-shadow: 0 0 20px rgba(255,255,255,0.3);
cursor: pointer;
}
@keyframes floating {
0% { transform: translateY(0px) rotate(0deg); }
25% { transform: translateY(-5px) rotate(-5deg); }
75% { transform: translateY(5px) rotate(5deg); }
100% { transform: translateY(0px) rotate(0deg); }
}
.camera-anim {
display: inline-block;
animation: floating 4s infinite ease-in-out;
margin-right: 10px;
}
</style>
<div style="background: linear-gradient(135deg, #4A00E0 0%, #8E2DE2 100%); padding: 3rem; border-radius: 20px; text-align: center; margin-bottom: 2rem; box-shadow: 0 10px 30px rgba(0,0,0,0.1);">
<a href="https://github.com/Amey-Thakur/ZERO-SHOT-VIDEO-GENERATION" target="_blank" class="title-link">
<h1 style="color: white; font-size: 3.5rem; font-weight: 800; margin: 0; text-shadow: 2px 2px 4px rgba(0,0,0,0.2); letter-spacing: -1px;">
<span class="camera-anim">πŸŽ₯</span> Zero-Shot Video Generation
</h1>
</a>
<p style="color: rgba(255,255,255,0.9); font-size: 1.3rem; margin-top: 1rem; font-weight: 500;">
Text-to-Video Studio using Temporal Latent Warping & Cross-Frame Attention
</p>
</div>
"""
)
with gr.Tab('Zero-Shot Text2Video'):
# Invoke the pre-defined layout specific to the Text-to-Video generative logic, passing
# the initialized main diffusion model capable of handling the temporal latent inference.
create_demo_text_to_video(model)
gr.HTML(
"""
<div style="text-align: center; margin-top: 3rem; padding: 2.5rem; border-radius: 15px; background: rgba(142, 45, 226, 0.05); border: 1px solid rgba(142, 45, 226, 0.1);">
<p style="color: #4A00E0; font-size: 1rem; font-weight: 600; margin: 0;">
Β© 2023 <a href="https://github.com/Amey-Thakur" target="_blank" style="color: #8E2DE2; text-decoration: none !important; border-bottom: none !important; transition: all 0.3s ease;">Amey Thakur</a> | University of Windsor
</p>
<p style="color: #777; font-size: 0.85rem; margin-top: 0.75rem; max-width: 600px; margin-left: auto; margin-right: auto; line-height: 1.5;">
<b>Research Foundation:</b> Based on foundational breakthroughs in zero-shot temporal consistency by Picsart AI Research (PAIR), UT Austin, U of Oregon, and UIUC.
</p>
</div>
"""
)
# --- APPLICATION DEPLOYMENT ALGORITHM ---
# Deploys the constructed graphical interface. Configures queuing mechanisms intrinsically to
# prevent execution thread over-saturation during concurrent generation requests.
if on_huggingspace:
demo.queue().launch(
debug=True,
ssr_mode=False,
theme=gr.themes.Soft(primary_hue="blue", secondary_hue="indigo")
)
else:
_, _, link = demo.queue().launch(
allowed_paths=['temporal'],
share=args.public_access,
css='style.css',
theme=gr.themes.Soft(primary_hue="blue", secondary_hue="indigo")
)
print(link)