Spaces:

morbiwalaq
/

Text2Vid-AI

Runtime error

App Files Files Community

Text2Vid-AI / app.py

morbiwalaq

Update app.py

f21ef4e verified about 1 year ago

raw

history blame contribute delete

3.1 kB

	import torch
	import gradio as gr
	from diffusers import CogVideoXPipeline, CogVideoXDPMScheduler, CogVideoXTransformer3DModel
	from huggingface_hub import hf_hub_download, snapshot_download

	# Set device
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# (Optional) Download additional assets for upscaling or interpolation if needed.
	hf_hub_download(repo_id="ai-forever/Real-ESRGAN", filename="RealESRGAN_x4.pth", local_dir="model_real_esran")
	snapshot_download(repo_id="AlexWortega/RIFE", local_dir="model_rife")

	# Load the text-to-video model using diffusers' CogVideoXPipeline.
	# (Replace with your model ID if different.)
	pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-5b", torch_dtype=torch.bfloat16).to(device)
	pipe.scheduler = CogVideoXDPMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")

	# Optionally load an image-to-video transformer if your pipeline supports image conditioning.
	# (This may be used if you want to condition on an uploaded image.)
	i2v_transformer = CogVideoXTransformer3DModel.from_pretrained(
	"THUDM/CogVideoX-5b-I2V", subfolder="transformer", torch_dtype=torch.bfloat16
	)

	def generate_video(prompt, style, duration, image):
	"""
	Generate a Pokémon-themed video.

	The function "flavors" the text prompt with the chosen style and mentions iconic
	Pokémon elements (e.g. Ash, Pikachu, Team Rocket). The duration (in seconds) is passed
	to the pipeline (if supported). The 'image' input is optional and may be ignored if the
	pipeline does not support image conditioning.
	"""
	# Build a full prompt by combining user input with style and Pokémon-specific flavor.
	full_prompt = (
	f"{prompt}, in {style} style, lasting {duration} seconds. "
	"Include iconic Pokémon elements like Ash, Pikachu, and Team Rocket."
	)

	# Generate video (adjust inference parameters as needed)
	result = pipe(full_prompt, num_inference_steps=50, guidance_scale=7.5)

	# Assuming the pipeline returns a dict with a 'videos' key (a list of generated videos)
	video = result.videos[0]
	return video

	# Build the Gradio UI.
	with gr.Blocks() as demo:
	gr.Markdown("# 🎥 PokeVidGen AI")
	gr.Markdown("Generate Pokémon anime shorts using CogVideoX-5b! Enter your scene prompt, choose an animation style, set the duration, and optionally upload an image.")

	with gr.Row():
	prompt_input = gr.Textbox(label="Enter Pokémon Scene", placeholder="Ash battles Team Rocket with Pikachu's Thunderbolt")
	style_input = gr.Dropdown(choices=["Anime Classic", "Modern 3D", "Cartoon"], label="Animation Style", value="Anime Classic")

	duration_input = gr.Slider(minimum=1, maximum=10, step=1, label="Duration (seconds)", value=5)
	image_input = gr.Image(label="Optional Image", type="filepath")
	generate_button = gr.Button("Generate Video")
	video_output = gr.Video(label="Generated Video")

	generate_button.click(fn=generate_video, inputs=[prompt_input, style_input, duration_input, image_input], outputs=video_output)

	demo.launch()