Peeble's picture
Create app.py
f2b4c81 verified
import os
import torch
import gradio as gr
import ffmpeg
from diffusers import StableDiffusionPipeline
# Load Stable Diffusion for AI image generation
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16)
pipe = pipe.to("cuda")
def generate_image(prompt, style=None):
"""Generate an AI image from a text prompt and style."""
full_prompt = f"{style} {prompt}" if style else prompt
image = pipe(full_prompt).images[0]
return image
def create_video(images, audio_path, output_path="output_video.mp4", fps=1):
"""Create a video using ffmpeg without moviepy."""
image_files = []
for i, img in enumerate(images):
img_path = f"frame_{i}.png"
img.save(img_path)
image_files.append(img_path)
# Use ffmpeg to generate video
input_images = "frame_%d.png"
ffmpeg.input(input_images, framerate=fps).output(output_path, vcodec="libx264", pix_fmt="yuv420p").run()
# Add audio using ffmpeg
video_with_audio = "final_output.mp4"
ffmpeg.input(output_path).input(audio_path).output(video_with_audio, codec="copy").run()
return video_with_audio
def process_input(prompt, style, audio_file):
"""Handles user input, generates images, and creates the AI video."""
images = [generate_image(prompt, style)]
video_path = create_video(images, audio_file.name)
return video_path
# Gradio UI
iface = gr.Interface(
fn=process_input,
inputs=[
gr.Textbox(label="Text Prompt"),
gr.Textbox(label="Style (e.g., Roblox, Pixel Art, Realistic)"),
gr.Audio(label="Upload MP3", type="file")
],
outputs=gr.Video(label="Generated AI Video"),
title="MP3 & Text to AI Video Generator",
description="Upload an MP3, enter a text prompt, select a style, and generate an AI video."
)
iface.launch(share=True)