import gradio as gr
import random
import time
from datetime import datetime
import tempfile
import os
from moviepy.editor import ImageClip, concatenate_videoclips
from gradio_client import Client
from PIL import Image
import edge_tts
import asyncio
import warnings
import numpy as np

warnings.filterwarnings('ignore')

# Initialize the Gradio client for model access
client = Client("stabilityai/stable-diffusion-xl-base-1.0")
arxiv_client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")

STORY_GENRES = [
    "Science Fiction",
    "Fantasy",
    "Mystery",
    "Romance",
    "Horror",
    "Adventure",
    "Historical Fiction",
    "Comedy"
]

STORY_STRUCTURES = {
    "Three Act": "Setup (Introduction, Inciting Incident) -> Confrontation (Rising Action, Climax) -> Resolution (Falling Action, Conclusion)",
    "Hero's Journey": "Ordinary World -> Call to Adventure -> Trials -> Transformation -> Return",
    "Five Act": "Exposition -> Rising Action -> Climax -> Falling Action -> Resolution",
    "Seven Point": "Hook -> Plot Turn 1 -> Pinch Point 1 -> Midpoint -> Pinch Point 2 -> Plot Turn 2 -> Resolution"
}

async def generate_speech(text, voice="en-US-AriaNeural"):
    """Generate speech from text using edge-tts"""
    try:
        communicate = edge_tts.Communicate(text, voice)
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
            tmp_path = tmp_file.name
            await communicate.save(tmp_path)
        return tmp_path
    except Exception as e:
        print(f"Error in text2speech: {str(e)}")
        raise

def generate_story_prompt(base_prompt, genre, structure):
    """Generate an expanded story prompt based on genre and structure"""
    prompt = f"""Create a {genre} story using this concept: '{base_prompt}'
    Follow this structure: {STORY_STRUCTURES[structure]}
    Include vivid descriptions and sensory details.
    Make it engaging and suitable for visualization.
    Keep each scene description clear and detailed enough for image generation.
    Limit the story to 5-7 key scenes.
    """
    return prompt

def generate_story(prompt, model_choice):
    """Generate story using specified model"""
    try:
        result = arxiv_client.predict(
            prompt,
            model_choice,
            True,
            api_name="/ask_llm"
        )
        return result
    except Exception as e:
        return f"Error generating story: {str(e)}"

def generate_image_from_text(text_prompt):
    """Generate an image from text description"""
    try:
        result = client.predict(
            text_prompt,
            num_inference_steps=30,
            guidance_scale=7.5,
            width=768,
            height=512,
            api_name="/text2image"
        )
        return result
    except Exception as e:
        return None

def create_video_from_images(image_paths, durations):
    """Create video from a series of images"""
    clips = [ImageClip(img_path).set_duration(dur) for img_path, dur in zip(image_paths, durations)]
    final_clip = concatenate_videoclips(clips, method="compose")
    output_path = tempfile.mktemp(suffix=".mp4")
    final_clip.write_videofile(output_path, fps=24)
    return output_path

def process_story(story_text, num_scenes=5):
    """Break story into scenes for visualization"""
    sentences = story_text.split('.')
    scenes = []
    scene_length = max(1, len(sentences) // num_scenes)
    
    for i in range(0, len(sentences), scene_length):
        scene = '. '.join(sentences[i:i+scene_length]).strip()
        if scene:
            scenes.append(scene)
    
    return scenes[:num_scenes]

def story_generator_interface(prompt, genre, structure, model_choice, num_scenes, words_per_scene):
    """Main story generation and multimedia creation function"""
    
    # Generate expanded prompt
    story_prompt = generate_story_prompt(prompt, genre, structure)
    
    # Generate story
    story = generate_story(story_prompt, model_choice)
    
    # Process story into scenes
    scenes = process_story(story, num_scenes)
    
    # Generate images for each scene
    image_paths = []
    for scene in scenes:
        image = generate_image_from_text(scene)
        if image is not None:
            temp_path = tempfile.mktemp(suffix=".png")
            Image.fromarray(image).save(temp_path)
            image_paths.append(temp_path)
    
    # Generate speech
    audio_path = asyncio.run(generate_speech(story))
    
    # Create video
    scene_durations = [5.0] * len(image_paths)  # 5 seconds per scene
    video_path = create_video_from_images(image_paths, scene_durations)
    
    return story, image_paths, audio_path, video_path

# Create Gradio interface
with gr.Blocks(title="AI Story Generator & Visualizer") as demo:
    gr.Markdown("# 🎭 AI Story Generator & Visualizer")
    
    with gr.Row():
        with gr.Column():
            prompt_input = gr.Textbox(
                label="Story Concept",
                placeholder="Enter your story idea...",
                lines=3
            )
            genre_input = gr.Dropdown(
                label="Genre",
                choices=STORY_GENRES,
                value="Fantasy"
            )
            structure_input = gr.Dropdown(
                label="Story Structure",
                choices=list(STORY_STRUCTURES.keys()),
                value="Three Act"
            )
            model_choice = gr.Dropdown(
                label="Model",
                choices=["mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.2"],
                value="mistralai/Mixtral-8x7B-Instruct-v0.1"
            )
            num_scenes = gr.Slider(
                label="Number of Scenes",
                minimum=3,
                maximum=7,
                value=5,
                step=1
            )
            words_per_scene = gr.Slider(
                label="Words per Scene",
                minimum=20,
                maximum=100,
                value=50,
                step=10
            )
            generate_btn = gr.Button("Generate Story & Media")
    
    with gr.Row():
        with gr.Column():
            story_output = gr.Textbox(
                label="Generated Story",
                lines=10,
                readonly=True
            )
        with gr.Column():
            gallery = gr.Gallery(label="Scene Visualizations")
    
    with gr.Row():
        audio_output = gr.Audio(label="Story Narration")
        video_output = gr.Video(label="Story Video")
    
    generate_btn.click(
        fn=story_generator_interface,
        inputs=[prompt_input, genre_input, structure_input, model_choice, num_scenes, words_per_scene],
        outputs=[story_output, gallery, audio_output, video_output]
    )

if __name__ == "__main__":
    demo.launch(reload=True)