import streamlit as st
import torch
from diffusers import StableDiffusionPipeline
from gtts import gTTS
import tempfile
import os

# Load the Stable Diffusion Model with GPU optimization
@st.cache_resource
def load_model():
    model_id = "runwayml/stable-diffusion-v1-5"
    
    # Check if CUDA (GPU) is available
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    # Load the model without torch_dtype=torch.float16 on CPU
    if device == "cuda":
        pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
    else:
        pipe = StableDiffusionPipeline.from_pretrained(model_id)
        
    pipe.to(device)
    return pipe

pipe = load_model()

# Function to generate image (simulating video generation by a single frame)
def generate_video(prompt):
    with torch.no_grad():
        image = pipe(prompt).images[0]
    return image

# Function to generate audio narration
def generate_audio(text, lang):
    tts = gTTS(text=text, lang=lang)
    temp_audio_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
    tts.save(temp_audio_path)
    return temp_audio_path

# Streamlit UI
st.title("AI Video Generator")
st.write("Enter a prompt, select a language, and generate a video!")

prompt = st.text_area("Enter your video description:")
language = st.selectbox("Select Language for Narration:", ["en", "es", "fr", "de", "zh", "hi"])

generate_btn = st.button("Generate Video")

if generate_btn and prompt:
    st.write("Generating video... Please wait!")
    
    # Simulate video generation by creating just 1 frame (image)
    image = generate_video(prompt)
    
    # Save image
    image_path = "generated_image.png"
    image.save(image_path)

    # Generate narration
    audio_path = generate_audio(prompt, language)
    
    # Display the image and audio (simulating a video)
    st.image(image_path)
    st.audio(audio_path)
    
    st.success("Video generation completed!")