import os import torch import gradio as gr import ffmpeg from diffusers import StableDiffusionPipeline # Load Stable Diffusion for AI image generation pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16) pipe = pipe.to("cuda") def generate_image(prompt, style=None): """Generate an AI image from a text prompt and style.""" full_prompt = f"{style} {prompt}" if style else prompt image = pipe(full_prompt).images[0] return image def create_video(images, audio_path, output_path="output_video.mp4", fps=1): """Create a video using ffmpeg without moviepy.""" image_files = [] for i, img in enumerate(images): img_path = f"frame_{i}.png" img.save(img_path) image_files.append(img_path) # Use ffmpeg to generate video input_images = "frame_%d.png" ffmpeg.input(input_images, framerate=fps).output(output_path, vcodec="libx264", pix_fmt="yuv420p").run() # Add audio using ffmpeg video_with_audio = "final_output.mp4" ffmpeg.input(output_path).input(audio_path).output(video_with_audio, codec="copy").run() return video_with_audio def process_input(prompt, style, audio_file): """Handles user input, generates images, and creates the AI video.""" images = [generate_image(prompt, style)] video_path = create_video(images, audio_file.name) return video_path # Gradio UI iface = gr.Interface( fn=process_input, inputs=[ gr.Textbox(label="Text Prompt"), gr.Textbox(label="Style (e.g., Roblox, Pixel Art, Realistic)"), gr.Audio(label="Upload MP3", type="file") ], outputs=gr.Video(label="Generated AI Video"), title="MP3 & Text to AI Video Generator", description="Upload an MP3, enter a text prompt, select a style, and generate an AI video." ) iface.launch(share=True)