File size: 6,959 Bytes
4edb0a5 57868fa 4edb0a5 f49ebce 4edb0a5 2b15a26 1609c70 2b15a26 f6cc056 2b15a26 4edb0a5 2b15a26 f6cc056 2b15a26 7f6108f f43a22f 4edb0a5 7f6108f f43a22f 7c8e90d 7f6108f f43a22f 7f6108f 4edb0a5 021e7d4 4edb0a5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 | # ==================================================================================================
# ZERO-SHOT-VIDEO-GENERATION - app.py (Primary Application Interface)
# ==================================================================================================
#
# π DESCRIPTION
# This script serves as the main entry point and Gradio-based web interface for the Zero-Shot
# Video Generation framework. It provisions the required neural network models and exposes a
# user-friendly front-end for generating temporally consistent video content from textual prompts.
# The interface is robustly abstracted to handle execution seamlessly across various environments,
# inclusive of local execution and cloud instances.
#
# π€ AUTHORS
# - Amey Thakur (https://github.com/Amey-Thakur)
#
# π€π» CREDITS
# Based directly on the foundational logic of Text2Video-Zero.
# Source Authors: Picsart AI Research (PAIR), UT Austin, U of Oregon, UIUC
# Reference: https://arxiv.org/abs/2303.13439
#
# π PROJECT LINKS
# Repository: https://github.com/Amey-Thakur/ZERO-SHOT-VIDEO-GENERATION
# Live Demo: https://huggingface.co/spaces/ameythakur/Zero-Shot-Video-Generation
# Video Demo: https://youtu.be/za9hId6UPoY
#
# π
RELEASE DATE
# November 22, 2023
#
# π LICENSE
# Released under the MIT License
# ==================================================================================================
import warnings
# Suppress unavoidable third-party deprecation warnings (torch.distributed, timm, diffusers).
# These originate inside library internals and cannot be fixed from application code.
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning, message=".*deprecated.*")
warnings.filterwarnings("ignore", category=UserWarning, message=".*Mapping deprecated.*")
import gradio as gr
import torch
from model import Model, ModelType
from app_text_to_video import create_demo as create_demo_text_to_video
import argparse
import os
# --- ENVIRONMENT & HARDWARE INITIALIZATION ---
# Identify the operational environment to conditionally adapt interface parameters.
# Checking for 'SPACE_ID' is the robust, platform-agnostic way to detect a Hugging Face Space.
on_huggingspace = os.environ.get("SPACE_ID") is not None
device = "cuda" if torch.cuda.is_available() else "cpu"
# Instantiate the primary generative diffusion model employing Float16 on GPU resources
# for memory-efficient tensor operations, and Float32 as a robust computational fallback.
model = Model(device=device, dtype=torch.float16 if device == "cuda" else torch.float32)
# --- CLI ARGUMENTS PARSING ---
# Establishes public accessibility parameters, useful when tunneling standard localhost traffic
# securely for temporary external evaluations over the internet.
parser = argparse.ArgumentParser()
parser.add_argument('--public_access', action='store_true',
help="if enabled, the app can be access from a public url", default=False)
args = parser.parse_args()
# --- WEB INTERFACE ARCHITECTURE ---
# Assembles the Gradio Application Block layout, injecting structured HTML context and
# encapsulating the discrete video synthesis module instance utilizing the neural pipeline.
with gr.Blocks() as demo:
gr.HTML(
"""
<style>
.title-link {
color: white !important;
text-decoration: none !important;
border-bottom: none !important;
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
display: block;
}
.title-link:hover {
transform: scale(1.01);
text-shadow: 0 0 20px rgba(255,255,255,0.3);
cursor: pointer;
}
@keyframes floating {
0% { transform: translateY(0px) rotate(0deg); }
25% { transform: translateY(-5px) rotate(-5deg); }
75% { transform: translateY(5px) rotate(5deg); }
100% { transform: translateY(0px) rotate(0deg); }
}
.camera-anim {
display: inline-block;
animation: floating 4s infinite ease-in-out;
margin-right: 10px;
}
</style>
<div style="background: linear-gradient(135deg, #4A00E0 0%, #8E2DE2 100%); padding: 3rem; border-radius: 20px; text-align: center; margin-bottom: 2rem; box-shadow: 0 10px 30px rgba(0,0,0,0.1);">
<a href="https://github.com/Amey-Thakur/ZERO-SHOT-VIDEO-GENERATION" target="_blank" class="title-link">
<h1 style="color: white; font-size: 3.5rem; font-weight: 800; margin: 0; text-shadow: 2px 2px 4px rgba(0,0,0,0.2); letter-spacing: -1px;">
<span class="camera-anim">π₯</span> Zero-Shot Video Generation
</h1>
</a>
<p style="color: rgba(255,255,255,0.9); font-size: 1.3rem; margin-top: 1rem; font-weight: 500;">
Text-to-Video Studio using Temporal Latent Warping & Cross-Frame Attention
</p>
</div>
"""
)
with gr.Tab('Zero-Shot Text2Video'):
# Invoke the pre-defined layout specific to the Text-to-Video generative logic, passing
# the initialized main diffusion model capable of handling the temporal latent inference.
create_demo_text_to_video(model)
gr.HTML(
"""
<div style="text-align: center; margin-top: 3rem; padding: 2.5rem; border-radius: 15px; background: rgba(142, 45, 226, 0.05); border: 1px solid rgba(142, 45, 226, 0.1);">
<p style="color: #4A00E0; font-size: 1rem; font-weight: 600; margin: 0;">
Β© 2023 <a href="https://github.com/Amey-Thakur" target="_blank" style="color: #8E2DE2; text-decoration: none !important; border-bottom: none !important; transition: all 0.3s ease;">Amey Thakur</a> | University of Windsor
</p>
<p style="color: #777; font-size: 0.85rem; margin-top: 0.75rem; max-width: 600px; margin-left: auto; margin-right: auto; line-height: 1.5;">
<b>Research Foundation:</b> Based on foundational breakthroughs in zero-shot temporal consistency by Picsart AI Research (PAIR), UT Austin, U of Oregon, and UIUC.
</p>
</div>
"""
)
# --- APPLICATION DEPLOYMENT ALGORITHM ---
# Deploys the constructed graphical interface. Configures queuing mechanisms intrinsically to
# prevent execution thread over-saturation during concurrent generation requests.
if on_huggingspace:
demo.queue().launch(
debug=True,
ssr_mode=False,
theme=gr.themes.Soft(primary_hue="blue", secondary_hue="indigo")
)
else:
_, _, link = demo.queue().launch(
allowed_paths=['temporal'],
share=args.public_access,
css='style.css',
theme=gr.themes.Soft(primary_hue="blue", secondary_hue="indigo")
)
print(link) |