import os
import torch
import gradio as gr
import ffmpeg
from diffusers import StableDiffusionPipeline

# Load Stable Diffusion for AI image generation
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16)
pipe = pipe.to("cuda")

def generate_image(prompt, style=None):
    """Generate an AI image from a text prompt and style."""
    full_prompt = f"{style} {prompt}" if style else prompt
    image = pipe(full_prompt).images[0]
    return image

def create_video(images, audio_path, output_path="output_video.mp4", fps=1):
    """Create a video using ffmpeg without moviepy."""
    image_files = []
    for i, img in enumerate(images):
        img_path = f"frame_{i}.png"
        img.save(img_path)
        image_files.append(img_path)
    
    # Use ffmpeg to generate video
    input_images = "frame_%d.png"
    ffmpeg.input(input_images, framerate=fps).output(output_path, vcodec="libx264", pix_fmt="yuv420p").run()

    # Add audio using ffmpeg
    video_with_audio = "final_output.mp4"
    ffmpeg.input(output_path).input(audio_path).output(video_with_audio, codec="copy").run()

    return video_with_audio

def process_input(prompt, style, audio_file):
    """Handles user input, generates images, and creates the AI video."""
    images = [generate_image(prompt, style)]
    video_path = create_video(images, audio_file.name)
    return video_path

# Gradio UI
iface = gr.Interface(
    fn=process_input,
    inputs=[
        gr.Textbox(label="Text Prompt"),
        gr.Textbox(label="Style (e.g., Roblox, Pixel Art, Realistic)"),
        gr.Audio(label="Upload MP3", type="file")
    ],
    outputs=gr.Video(label="Generated AI Video"),
    title="MP3 & Text to AI Video Generator",
    description="Upload an MP3, enter a text prompt, select a style, and generate an AI video."
)

iface.launch(share=True)