import gradio as gr import random import time from datetime import datetime import tempfile import os from moviepy.editor import ImageClip, concatenate_videoclips from gradio_client import Client from PIL import Image import edge_tts import asyncio import warnings import numpy as np warnings.filterwarnings('ignore') # Initialize the Gradio client for model access client = Client("stabilityai/stable-diffusion-xl-base-1.0") arxiv_client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern") STORY_GENRES = [ "Science Fiction", "Fantasy", "Mystery", "Romance", "Horror", "Adventure", "Historical Fiction", "Comedy" ] STORY_STRUCTURES = { "Three Act": "Setup (Introduction, Inciting Incident) -> Confrontation (Rising Action, Climax) -> Resolution (Falling Action, Conclusion)", "Hero's Journey": "Ordinary World -> Call to Adventure -> Trials -> Transformation -> Return", "Five Act": "Exposition -> Rising Action -> Climax -> Falling Action -> Resolution", "Seven Point": "Hook -> Plot Turn 1 -> Pinch Point 1 -> Midpoint -> Pinch Point 2 -> Plot Turn 2 -> Resolution" } async def generate_speech(text, voice="en-US-AriaNeural"): """Generate speech from text using edge-tts""" try: communicate = edge_tts.Communicate(text, voice) with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: tmp_path = tmp_file.name await communicate.save(tmp_path) return tmp_path except Exception as e: print(f"Error in text2speech: {str(e)}") raise def generate_story_prompt(base_prompt, genre, structure): """Generate an expanded story prompt based on genre and structure""" prompt = f"""Create a {genre} story using this concept: '{base_prompt}' Follow this structure: {STORY_STRUCTURES[structure]} Include vivid descriptions and sensory details. Make it engaging and suitable for visualization. Keep each scene description clear and detailed enough for image generation. Limit the story to 5-7 key scenes. """ return prompt def generate_story(prompt, model_choice): """Generate story using specified model""" try: result = arxiv_client.predict( prompt, model_choice, True, api_name="/ask_llm" ) return result except Exception as e: return f"Error generating story: {str(e)}" def generate_image_from_text(text_prompt): """Generate an image from text description""" try: result = client.predict( text_prompt, num_inference_steps=30, guidance_scale=7.5, width=768, height=512, api_name="/text2image" ) return result except Exception as e: return None def create_video_from_images(image_paths, durations): """Create video from a series of images""" clips = [ImageClip(img_path).set_duration(dur) for img_path, dur in zip(image_paths, durations)] final_clip = concatenate_videoclips(clips, method="compose") output_path = tempfile.mktemp(suffix=".mp4") final_clip.write_videofile(output_path, fps=24) return output_path def process_story(story_text, num_scenes=5): """Break story into scenes for visualization""" sentences = story_text.split('.') scenes = [] scene_length = max(1, len(sentences) // num_scenes) for i in range(0, len(sentences), scene_length): scene = '. '.join(sentences[i:i+scene_length]).strip() if scene: scenes.append(scene) return scenes[:num_scenes] def story_generator_interface(prompt, genre, structure, model_choice, num_scenes, words_per_scene): """Main story generation and multimedia creation function""" # Generate expanded prompt story_prompt = generate_story_prompt(prompt, genre, structure) # Generate story story = generate_story(story_prompt, model_choice) # Process story into scenes scenes = process_story(story, num_scenes) # Generate images for each scene image_paths = [] for scene in scenes: image = generate_image_from_text(scene) if image is not None: temp_path = tempfile.mktemp(suffix=".png") Image.fromarray(image).save(temp_path) image_paths.append(temp_path) # Generate speech audio_path = asyncio.run(generate_speech(story)) # Create video scene_durations = [5.0] * len(image_paths) # 5 seconds per scene video_path = create_video_from_images(image_paths, scene_durations) return story, image_paths, audio_path, video_path # Create Gradio interface with gr.Blocks(title="AI Story Generator & Visualizer") as demo: gr.Markdown("# 🎭 AI Story Generator & Visualizer") with gr.Row(): with gr.Column(): prompt_input = gr.Textbox( label="Story Concept", placeholder="Enter your story idea...", lines=3 ) genre_input = gr.Dropdown( label="Genre", choices=STORY_GENRES, value="Fantasy" ) structure_input = gr.Dropdown( label="Story Structure", choices=list(STORY_STRUCTURES.keys()), value="Three Act" ) model_choice = gr.Dropdown( label="Model", choices=["mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.2"], value="mistralai/Mixtral-8x7B-Instruct-v0.1" ) num_scenes = gr.Slider( label="Number of Scenes", minimum=3, maximum=7, value=5, step=1 ) words_per_scene = gr.Slider( label="Words per Scene", minimum=20, maximum=100, value=50, step=10 ) generate_btn = gr.Button("Generate Story & Media") with gr.Row(): with gr.Column(): story_output = gr.Textbox( label="Generated Story", lines=10, readonly=True ) with gr.Column(): gallery = gr.Gallery(label="Scene Visualizations") with gr.Row(): audio_output = gr.Audio(label="Story Narration") video_output = gr.Video(label="Story Video") generate_btn.click( fn=story_generator_interface, inputs=[prompt_input, genre_input, structure_input, model_choice, num_scenes, words_per_scene], outputs=[story_output, gallery, audio_output, video_output] ) if __name__ == "__main__": demo.launch(reload=True)