Spaces:

ameythakur
/

Zero-Shot-Video-Generation

Running

App Files Files Community

Zero-Shot-Video-Generation / Source Code /app.py

ameythakur

text2video

57868fa verified 3 days ago

raw

history blame contribute delete

6.96 kB

	# ==================================================================================================
	# ZERO-SHOT-VIDEO-GENERATION - app.py (Primary Application Interface)
	# ==================================================================================================
	#
	# 📝 DESCRIPTION
	# This script serves as the main entry point and Gradio-based web interface for the Zero-Shot
	# Video Generation framework. It provisions the required neural network models and exposes a
	# user-friendly front-end for generating temporally consistent video content from textual prompts.
	# The interface is robustly abstracted to handle execution seamlessly across various environments,
	# inclusive of local execution and cloud instances.
	#
	# 👤 AUTHORS
	# - Amey Thakur (https://github.com/Amey-Thakur)
	#
	# 🤝🏻 CREDITS
	# Based directly on the foundational logic of Text2Video-Zero.
	# Source Authors: Picsart AI Research (PAIR), UT Austin, U of Oregon, UIUC
	# Reference: https://arxiv.org/abs/2303.13439
	#
	# 🔗 PROJECT LINKS
	# Repository: https://github.com/Amey-Thakur/ZERO-SHOT-VIDEO-GENERATION
	# Live Demo: https://huggingface.co/spaces/ameythakur/Zero-Shot-Video-Generation
	# Video Demo: https://youtu.be/za9hId6UPoY
	#
	# 📅 RELEASE DATE
	# November 22, 2023
	#
	# 📜 LICENSE
	# Released under the MIT License
	# ==================================================================================================

	import warnings
	# Suppress unavoidable third-party deprecation warnings (torch.distributed, timm, diffusers).
	# These originate inside library internals and cannot be fixed from application code.
	warnings.filterwarnings("ignore", category=FutureWarning)
	warnings.filterwarnings("ignore", category=UserWarning, message=".deprecated.")
	warnings.filterwarnings("ignore", category=UserWarning, message=".Mapping deprecated.")

	import gradio as gr
	import torch

	from model import Model, ModelType
	from app_text_to_video import create_demo as create_demo_text_to_video
	import argparse
	import os

	# --- ENVIRONMENT & HARDWARE INITIALIZATION ---
	# Identify the operational environment to conditionally adapt interface parameters.
	# Checking for 'SPACE_ID' is the robust, platform-agnostic way to detect a Hugging Face Space.
	on_huggingspace = os.environ.get("SPACE_ID") is not None
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Instantiate the primary generative diffusion model employing Float16 on GPU resources
	# for memory-efficient tensor operations, and Float32 as a robust computational fallback.
	model = Model(device=device, dtype=torch.float16 if device == "cuda" else torch.float32)

	# --- CLI ARGUMENTS PARSING ---
	# Establishes public accessibility parameters, useful when tunneling standard localhost traffic
	# securely for temporary external evaluations over the internet.
	parser = argparse.ArgumentParser()
	parser.add_argument('--public_access', action='store_true',
	help="if enabled, the app can be access from a public url", default=False)
	args = parser.parse_args()

	# --- WEB INTERFACE ARCHITECTURE ---
	# Assembles the Gradio Application Block layout, injecting structured HTML context and
	# encapsulating the discrete video synthesis module instance utilizing the neural pipeline.
	with gr.Blocks() as demo:

	gr.HTML(
	"""
	<style>
	.title-link {
	color: white !important;
	text-decoration: none !important;
	border-bottom: none !important;
	transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
	display: block;
	}
	.title-link:hover {
	transform: scale(1.01);
	text-shadow: 0 0 20px rgba(255,255,255,0.3);
	cursor: pointer;
	}
	@keyframes floating {
	0% { transform: translateY(0px) rotate(0deg); }
	25% { transform: translateY(-5px) rotate(-5deg); }
	75% { transform: translateY(5px) rotate(5deg); }
	100% { transform: translateY(0px) rotate(0deg); }
	}
	.camera-anim {
	display: inline-block;
	animation: floating 4s infinite ease-in-out;
	margin-right: 10px;
	}
	</style>
	<div style="background: linear-gradient(135deg, #4A00E0 0%, #8E2DE2 100%); padding: 3rem; border-radius: 20px; text-align: center; margin-bottom: 2rem; box-shadow: 0 10px 30px rgba(0,0,0,0.1);">
	<a href="https://github.com/Amey-Thakur/ZERO-SHOT-VIDEO-GENERATION" target="_blank" class="title-link">
	<h1 style="color: white; font-size: 3.5rem; font-weight: 800; margin: 0; text-shadow: 2px 2px 4px rgba(0,0,0,0.2); letter-spacing: -1px;">
	<span class="camera-anim">🎥</span> Zero-Shot Video Generation
	</h1>
	</a>
	<p style="color: rgba(255,255,255,0.9); font-size: 1.3rem; margin-top: 1rem; font-weight: 500;">
	Text-to-Video Studio using Temporal Latent Warping & Cross-Frame Attention
	</p>
	</div>
	"""
	)

	with gr.Tab('Zero-Shot Text2Video'):
	# Invoke the pre-defined layout specific to the Text-to-Video generative logic, passing
	# the initialized main diffusion model capable of handling the temporal latent inference.
	create_demo_text_to_video(model)

	gr.HTML(
	"""
	<div style="text-align: center; margin-top: 3rem; padding: 2.5rem; border-radius: 15px; background: rgba(142, 45, 226, 0.05); border: 1px solid rgba(142, 45, 226, 0.1);">
	<p style="color: #4A00E0; font-size: 1rem; font-weight: 600; margin: 0;">
	© 2023 <a href="https://github.com/Amey-Thakur" target="_blank" style="color: #8E2DE2; text-decoration: none !important; border-bottom: none !important; transition: all 0.3s ease;">Amey Thakur</a> \| University of Windsor
	</p>
	<p style="color: #777; font-size: 0.85rem; margin-top: 0.75rem; max-width: 600px; margin-left: auto; margin-right: auto; line-height: 1.5;">
	<b>Research Foundation:</b> Based on foundational breakthroughs in zero-shot temporal consistency by Picsart AI Research (PAIR), UT Austin, U of Oregon, and UIUC.
	</p>
	</div>
	"""
	)

	# --- APPLICATION DEPLOYMENT ALGORITHM ---
	# Deploys the constructed graphical interface. Configures queuing mechanisms intrinsically to
	# prevent execution thread over-saturation during concurrent generation requests.
	if on_huggingspace:
	demo.queue().launch(
	debug=True,
	ssr_mode=False,
	theme=gr.themes.Soft(primary_hue="blue", secondary_hue="indigo")
	)
	else:
	_, _, link = demo.queue().launch(
	allowed_paths=['temporal'],
	share=args.public_access,
	css='style.css',
	theme=gr.themes.Soft(primary_hue="blue", secondary_hue="indigo")
	)
	print(link)