Spaces:
Runtime error
Runtime error
| import torch | |
| import gradio as gr | |
| from diffusers import CogVideoXPipeline, CogVideoXDPMScheduler, CogVideoXTransformer3DModel | |
| from huggingface_hub import hf_hub_download, snapshot_download | |
| # Set device | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # (Optional) Download additional assets for upscaling or interpolation if needed. | |
| hf_hub_download(repo_id="ai-forever/Real-ESRGAN", filename="RealESRGAN_x4.pth", local_dir="model_real_esran") | |
| snapshot_download(repo_id="AlexWortega/RIFE", local_dir="model_rife") | |
| # Load the text-to-video model using diffusers' CogVideoXPipeline. | |
| # (Replace with your model ID if different.) | |
| pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-5b", torch_dtype=torch.bfloat16).to(device) | |
| pipe.scheduler = CogVideoXDPMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing") | |
| # Optionally load an image-to-video transformer if your pipeline supports image conditioning. | |
| # (This may be used if you want to condition on an uploaded image.) | |
| i2v_transformer = CogVideoXTransformer3DModel.from_pretrained( | |
| "THUDM/CogVideoX-5b-I2V", subfolder="transformer", torch_dtype=torch.bfloat16 | |
| ) | |
| def generate_video(prompt, style, duration, image): | |
| """ | |
| Generate a Pokémon-themed video. | |
| The function "flavors" the text prompt with the chosen style and mentions iconic | |
| Pokémon elements (e.g. Ash, Pikachu, Team Rocket). The duration (in seconds) is passed | |
| to the pipeline (if supported). The 'image' input is optional and may be ignored if the | |
| pipeline does not support image conditioning. | |
| """ | |
| # Build a full prompt by combining user input with style and Pokémon-specific flavor. | |
| full_prompt = ( | |
| f"{prompt}, in {style} style, lasting {duration} seconds. " | |
| "Include iconic Pokémon elements like Ash, Pikachu, and Team Rocket." | |
| ) | |
| # Generate video (adjust inference parameters as needed) | |
| result = pipe(full_prompt, num_inference_steps=50, guidance_scale=7.5) | |
| # Assuming the pipeline returns a dict with a 'videos' key (a list of generated videos) | |
| video = result.videos[0] | |
| return video | |
| # Build the Gradio UI. | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 🎥 PokeVidGen AI") | |
| gr.Markdown("Generate Pokémon anime shorts using CogVideoX-5b! Enter your scene prompt, choose an animation style, set the duration, and optionally upload an image.") | |
| with gr.Row(): | |
| prompt_input = gr.Textbox(label="Enter Pokémon Scene", placeholder="Ash battles Team Rocket with Pikachu's Thunderbolt") | |
| style_input = gr.Dropdown(choices=["Anime Classic", "Modern 3D", "Cartoon"], label="Animation Style", value="Anime Classic") | |
| duration_input = gr.Slider(minimum=1, maximum=10, step=1, label="Duration (seconds)", value=5) | |
| image_input = gr.Image(label="Optional Image", type="filepath") | |
| generate_button = gr.Button("Generate Video") | |
| video_output = gr.Video(label="Generated Video") | |
| generate_button.click(fn=generate_video, inputs=[prompt_input, style_input, duration_input, image_input], outputs=video_output) | |
| demo.launch() | |