Spaces:

jarondon82
/

hack

Sleeping

File size: 26,271 Bytes

import os
import streamlit as st
import requests
from PIL import Image
from io import BytesIO
import replicate
from dotenv import load_dotenv
import json
import time

# Page configuration must be the first Streamlit command
st.set_page_config(page_title="CuentAI – AI Story Maker", layout="wide")

# Load environment variables
load_dotenv()

# Configuración de APIs
replicate_token = os.getenv("REPLICATE_API_TOKEN")
openai_api_key = os.getenv("OPENAI_API_KEY")

# Funciones para comunicarse con OpenAI directamente a través de requests en lugar de usar el SDK
def openai_chat_completion(prompt, model="gpt-3.5-turbo", temperature=0.8, max_tokens=600):
    """Usa la API de OpenAI directamente a través de requests para evitar problemas con el SDK"""
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {openai_api_key}"
    }
    
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": float(temperature),
        "max_tokens": int(max_tokens)
    }
    
    try:
        with st.spinner("Generando historia personalizada..."):
            response = requests.post(
                "https://api.openai.com/v1/chat/completions",
                headers=headers,
                json=payload,
                timeout=60  # Aumentar timeout para evitar errores por tiempo
            )
            
            # Si hay un error, mostrar el detalle de forma discreta
            if response.status_code != 200:
                st.error("No se pudo generar la historia. Intenta de nuevo más tarde.")
                return None
                
            response_data = response.json()
            return response_data["choices"][0]["message"]["content"]
    except Exception as e:
        st.error(f"Error con la API de OpenAI: {str(e)}")
        return None

def openai_generate_image(prompt, size="1024x1024", quality="standard"):
    """Generate images with DALL-E 3 directly via requests"""
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {openai_api_key}"
    }
    
    payload = {
        "model": "dall-e-3",
        "prompt": prompt,
        "n": 1,
        "size": size,
        "quality": quality
    }
    
    try:
        with st.spinner("Creating magical illustration..."):
            response = requests.post(
                "https://api.openai.com/v1/images/generations",
                headers=headers,
                json=payload,
                timeout=90  # Extended timeout for high quality images
            )
            
            if response.status_code != 200:
                st.error(f"Could not generate the illustration. Using placeholder image. Error: {response.text}")
                return "https://via.placeholder.com/512x512.png?text=Image+Not+Available"
                
            return response.json()["data"][0]["url"]
    except Exception as e:
        # Discrete error without technical messages
        st.error(f"There was a problem creating the illustration: {str(e)}")
        return "https://via.placeholder.com/512x512.png?text=Image+Not+Available"

# Optional TTS setup
# Uncomment if using Google Cloud TTS
# from google.cloud import texttospeech

# Function to load prompt templates
def load_prompt(file_path, default_prompt=""):
    """Cargar un archivo de prompt o devolver un valor predeterminado si falla"""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read().strip()
    except Exception as e:
        # No mostrar error si tenemos un valor predeterminado
        if not default_prompt:
            st.warning(f"No se pudo cargar el archivo {file_path}. Usando prompt predeterminado.")
        return default_prompt

# Story generation function
def generate_story(name: str, theme: str) -> str:
    """
    Prompt GPT-4 to write a 300–400 word children's story in English,
    with protagonist {name} and theme {theme}. Use playful tone,
    simple dialogue, and a clear beginning, middle, and end.
    """
    # Prompt predeterminado integrado en caso de que fallen los archivos externos
    default_template = (
        "You are a children's story author. "
        "Write a 300-400 word children's story where the protagonist is named {name} "
        "and the plot is about {theme}. Use a friendly style and simple dialogue. "
        "Include a clear beginning, middle, and end."
    )
    
    # Intentar cargar desde archivo, con fallback al predeterminado
    prompt_template = load_prompt("prompts/story_prompt.txt", default_template)
    
    # Format the prompt with user inputs
    prompt = prompt_template.format(name=name, theme=theme)
    
    try:
        # Usar nuestra función personalizada que se comunica directamente con la API
        story = openai_chat_completion(prompt)
        if story:
            return story.strip()
        else:
            raise Exception("No se pudo generar la historia")
    except Exception as e:
        st.error(f"Error generating story: {e}")
        # Return a fallback story for demo purposes
        return f"""
        # The Great Discovery of {name}

        Once upon a time, there was a child named {name} who dreamed about {theme}.
        
        On a sunny day, {name} decided to explore the garden of their house. Among the flowers and trees, 
        they found a small door that had never been seen before.
        
        "What could this be?" {name} wondered curiously.
        
        Upon opening the door, they discovered a magical world full of bright colors and fantastic creatures.
        
        "Welcome!" said a talking butterfly. "We've been waiting for you."
        
        {name} spent the entire day meeting new friends and learning about the importance of caring for nature.
        
        When they returned home, they promised to come back soon and share their adventures with all their friends.
        
        The End.
        """

# Scene segmentation
def split_into_scenes(text: str, num_scenes: int = 3) -> list[str]:
    """
    Split story into specified number of scenes, trying to preserve paragraph structure.
    """
    # Split by paragraphs if possible, else chunk by word count
    paras = [p for p in text.split("\n") if p.strip()]
    
    if len(paras) >= num_scenes:
        # Combine paragraphs to get desired number of scenes
        result = []
        paragraphs_per_scene = len(paras) // num_scenes
        for i in range(num_scenes):
            start_idx = i * paragraphs_per_scene
            end_idx = start_idx + paragraphs_per_scene if i < num_scenes - 1 else len(paras)
            result.append("\n".join(paras[start_idx:end_idx]))
        return result
    else:
        # If not enough paragraphs, split by word count
        words = text.split()
        chunk_size = len(words) // num_scenes
        return [" ".join(words[i*chunk_size : (i+1)*chunk_size]) for i in range(num_scenes)]

# Image generation with DALL-E 3
def generate_image_dalle(prompt: str, protagonist: str, style: str = "Disney/Pixar") -> str:
    """
    Call OpenAI Image API to create one 1024x1024 image from the prompt with distinct visual styles.
    Returns the image URL.
    """
    # Get character gender for proper styling
    character_gender = "gender-neutral"  # Default
    
    # Comprehensive style definitions with strong visual differences
    style_templates = {
        "Disney/Pixar": {
            "description": "3D Pixar animation style",
            "prompt": f"Create a stunning Pixar/Disney 3D animation style illustration. The scene shows {protagonist} {{scene_description}}. Render in the exact distinctive style of Pixar's 'Coco' or 'Soul' with volumetric lighting, subsurface scattering on skin, and detailed texturing. Use vibrant colors, expressive character designs with large eyes, and cinematic composition. The image should have depth of field, strong emotional expressions, and a polished, rendered feel."
        },
        
        "Watercolor": {
            "description": "Delicate watercolor painting",
            "prompt": f"Create a delicate watercolor illustration of {protagonist} {{scene_description}}. Use soft transparent layers with visible paper texture and bleeding colors. The style should resemble classic Beatrix Potter or Maurice Sendak watercolors with gentle brush strokes, subtle color washes, and minimal line work. Include soft edges, color gradients, and the distinctive bleeding effect of watercolor on paper. The palette should use pastel tones with occasional vivid accents."
        },
        
        "Comic Book": {
            "description": "Bold comic book art",
            "prompt": f"Create a dynamic comic book style illustration showing {protagonist} {{scene_description}}. Use the distinctive style of modern comic books with bold black outlines, flat color fills, dramatic perspectives, and action lines. Include comic-specific elements like dramatic shadows, exaggerated expressions, and dynamic poses. The colors should be vibrant with strong contrasts, reminiscent of Marvel or DC comic art with cel-shading techniques."
        },
        
        "Claymation": {
            "description": "Handcrafted clay animation",
            "prompt": f"Create a claymation/stop-motion style illustration of {protagonist} {{scene_description}}. The image should look exactly like a photograph of handcrafted clay figures in a miniature set, similar to Aardman's 'Wallace and Gromit' or Laika's work. Show visible fingerprint textures in the clay, slightly imperfect proportions, and the distinctive matte finish of modeling clay. Include miniature handcrafted props, visible set construction, and the characteristic charm of stop-motion."
        },
        
        "Anime": {
            "description": "Japanese anime style",
            "prompt": f"Create a Japanese anime-style illustration showing {protagonist} {{scene_description}}. Use the distinctive anime aesthetic with large expressive eyes, simplified facial features, and stylized colorful hair. The style should feature cel-shaded colors, speed lines for movement, and exaggerated emotional expressions. Include anime-specific visual elements like dramatic lighting effects, simplified backgrounds with depth, and the clean linework characteristic of Studio Ghibli or modern anime productions."
        },
        
        "Storybook": {
            "description": "Classic children's book illustration",
            "prompt": f"Create a classic children's storybook illustration showing {protagonist} {{scene_description}}. The style should resemble vintage children's books with intricate hand-drawn details, rich textures, and a warm, nostalgic quality. Use the distinctive illustration style of classic illustrators like E.H. Shepard (Winnie the Pooh) or Quentin Blake (Roald Dahl books) with detailed linework, subtle watercolor washes, and charming character designs. Include fine pen details, crosshatching, and the distinctive page-like quality of traditional book illustrations."
        }
    }
    
    # Get the specific style information
    style_info = style_templates.get(style, style_templates["Disney/Pixar"])
    
    # Base prompt with the style-specific instructions
    base_prompt = style_info["prompt"].format(scene_description=prompt)
    
    # Add strong anti-text instructions
    full_prompt = f"{base_prompt}\n\nCRITICAL REQUIREMENTS:\n" \
                 f"1. The image MUST NOT contain ANY text, words, letters, numbers, or writing of any kind.\n" \
                 f"2. Do not include speech bubbles, captions, labels, signs, or any other textual elements.\n" \
                 f"3. Focus exclusively on illustrating the visual scene without attempting to include any written language.\n" \
                 f"4. The illustration should communicate entirely through visual means only.\n\n" \
                 f"Create a complete, finished illustration with a clear foreground and background."
    
    try:
        # Use our custom function that communicates directly with the API
        # Use a larger image size for better quality
        image_url = openai_generate_image(full_prompt, size="1024x1024", quality="hd")
        if image_url:
            return image_url
        else:
            raise Exception("Could not generate the image")
    except Exception as e:
        st.error("Could not generate the illustration")
        # Return a placeholder image URL
        return "https://via.placeholder.com/512x512.png?text=Image+Not+Available"

# Optional: Image generation with Replicate (Stable Diffusion)
def generate_image_replicate(prompt: str, protagonist: str) -> str:
    """
    Alternative image generation using Replicate API with Stable Diffusion.
    """
    if not replicate_token:
        st.warning("Replicate API token not set. Using fallback image.")
        return "https://via.placeholder.com/512x512.png?text=Replicate+API+Token+Missing"
    
    # Load image prompt template
    img_prompt_template = load_prompt("prompts/image_prompt.txt")
    if not img_prompt_template:
        img_prompt_template = "Crea una ilustración de estilo infantil y colorido para un cuento para niños. La escena muestra: {scene_description} Con {protagonist_name} como personaje principal."
    
    # Format the prompt with user inputs
    full_prompt = img_prompt_template.format(
        scene_description=prompt,
        protagonist_name=protagonist
    )
    
    try:
        client = replicate.Client(api_token=replicate_token)
        output = client.run(
            "stability-ai/sdxl:2b017d9b67edd2ee1401238df49d75da53c523f36e363881e057f5dc3ed3c5b2",
            input={"prompt": full_prompt}
        )
        if output and isinstance(output, list) and len(output) > 0:
            return output[0]
        else:
            raise Exception("No output from Replicate API")
    except Exception as e:
        st.error(f"Error generating image with Replicate: {e}")
        return "https://via.placeholder.com/512x512.png?text=Replicate+Image+Failed"

# Optional Audio TTS function
def generate_audio_tts(text: str, filename="narration.mp3") -> str:
    """
    Generate audio narration from text using Google Cloud TTS.
    """
    # Check if Google Cloud TTS is available
    try:
        from google.cloud import texttospeech
        
        # Load TTS prompt template
        tts_params = load_prompt("prompts/tts_prompt.txt")
        
        client = texttospeech.TextToSpeechClient()
        input_text = texttospeech.SynthesisInput(text=text)
        voice = texttospeech.VoiceSelectionParams(
            language_code="en-US", 
            ssml_gender=texttospeech.SsmlVoiceGender.FEMALE
        )
        audio_config = texttospeech.AudioConfig(
            audio_encoding=texttospeech.AudioEncoding.MP3
        )
        
        response = client.synthesize_speech(
            input=input_text, 
            voice=voice, 
            audio_config=audio_config
        )
        
        with open(filename, "wb") as out:
            out.write(response.audio_content)
        
        return filename
    except ImportError:
        st.warning("Google Cloud Text-to-Speech is not installed. Skipping audio generation.")
        return None
    except Exception as e:
        st.error(f"Error generating audio: {e}")
        return None

# Custom CSS for child-friendly interface
def set_custom_css():
    st.markdown("""
    <style>
    @import url('https://fonts.googleapis.com/css2?family=Quicksand:wght@400;500;600;700&display=swap');
    
    * {
        font-family: 'Quicksand', sans-serif;
    }
    
    h1, h2, h3 {
        color: #6C63FF;
        font-weight: 700;
    }
    
    h1 {
        font-size: 2.5rem;
        margin-bottom: 0.5rem;
    }
    
    h3 {
        font-size: 1.5rem;
        font-weight: 500;
        opacity: 0.8;
        margin-bottom: 2rem;
    }
    
    .stApp {
        background-color: #F8F9FA;
    }
    
    .stButton>button {
        background-color: #6C63FF;
        color: white;
        font-weight: 600;
        border-radius: 50px;
        padding: 12px 24px;
        border: none;
        transition: all 0.3s ease;
        font-size: 16px;
        box-shadow: 0 4px 6px rgba(108, 99, 255, 0.2);
    }
    
    .stButton>button:hover {
        background-color: #5A52E0;
        transform: translateY(-2px);
        box-shadow: 0 6px 10px rgba(108, 99, 255, 0.3);
    }
    
    .stButton>button:active {
        transform: translateY(0);
        box-shadow: 0 2px 4px rgba(108, 99, 255, 0.2);
    }
    
    /* Colorful tabs for scenes */
    .stTabs [data-baseweb="tab-list"] {
        gap: 8px;
    }
    
    .stTabs [data-baseweb="tab"] {
        background-color: #F0F0FF;
        border-radius: 10px 10px 0 0;
        padding: 8px 16px;
        border: none;
    }
    
    .stTabs [aria-selected="true"] {
        background-color: #6C63FF !important;
        color: white !important;
    }
    
    /* Card-like containers for each scene */
    .scene-container {
        background-color: white;
        padding: 24px;
        border-radius: 16px;
        box-shadow: 0 8px 16px rgba(0,0,0,0.08);
        margin-bottom: 24px;
        border: 1px solid #f0f0f0;
    }
    
    .story-text {
        font-size: 18px;
        line-height: 1.7;
        color: #333;
    }
    
    /* Improve sidebar appearance */
    [data-testid="stSidebar"] > div:first-child {
        background-color: #F9F7FF;
        padding: 2rem 1rem;
    }
    
    /* Error and success messages */
    .element-container div[data-testid="stAlert"] {
        border-radius: 10px;
        padding: 12px;
    }
    
    /* Input fields */
    .stTextInput>div>div>input {
        border-radius: 10px;
        border: 2px solid #E0E0FF;
        padding: 12px 16px;
        font-size: 16px;
    }
    
    .stTextInput>div>div>input:focus {
        border-color: #6C63FF;
        box-shadow: 0 0 0 2px rgba(108, 99, 255, 0.2);
    }
    
    /* Slider styling */
    .stSlider [data-baseweb="slider"] {
        height: 6px;
    }
    
    .stSlider [data-baseweb="thumb"] {
        background-color: #6C63FF;
        border-color: #6C63FF;
        width: 20px;
        height: 20px;
    }
    </style>
    """, unsafe_allow_html=True)

# Streamlit UI
def main():
    # Apply custom CSS
    set_custom_css()
    
    # Header
    st.title("🧐‍♂️ CuentAI – AI Story Generator")
    st.markdown("### Create personalized stories with AI-generated images")
    
    # Sidebar with explanation and info moved to sidebar
    with st.sidebar:
        st.subheader("About CuentAI")
        st.write("""
        CuentAI is an application that uses artificial intelligence to create personalized children's stories in English, 
        with automatically generated illustrations for each scene of the story.
        
        **How it works:**
        1. Enter the protagonist's name
        2. Choose a theme for the story
        3. Click on "Generate Story"
        4. Enjoy your personalized story with images!
        """)
        
        st.subheader("Technologies")
        st.write("""
        - OpenAI GPT-3.5 for generating text
        - DALL-E 3 for creating illustrations
        - Streamlit for the web interface
        """)
        
        # Optional TTS toggle moved to sidebar
        if "story" in st.session_state:
            st.markdown("---")
            st.subheader("🔊 Audio Narration")
            
            if st.checkbox("Include audio narration"):
                # Check if TTS is imported
                try:
                    from google.cloud import texttospeech
                    with st.spinner("Generating audio..."):
                        if "audio_file" not in st.session_state:
                            audio_file = generate_audio_tts(st.session_state.story)
                            st.session_state.audio_file = audio_file
                        
                        if st.session_state.audio_file:
                            st.audio(st.session_state.audio_file)
                        else:
                            st.warning("Could not generate audio. Please check your Google Cloud configuration.")
                except ImportError:
                    st.warning("""
                    The narration feature requires Google Cloud Text-to-Speech.
                    
                    To enable this feature:
                    1. Install the library: `pip install google-cloud-texttospeech`
                    2. Configure your Google Cloud credentials
                    """)
    
    # Main content area with two columns: inputs on left, display on right
    main_cols = st.columns([1, 1])
    
    # Left column: Input form
    with main_cols[0]:
        st.subheader("Customize your story")
        
        # Form inputs
        with st.container():
            protagonist = st.text_input("Protagonist Name", "Alice")
            theme = st.text_input("Story Theme", "exploring a magical jungle")
            num_scenes = st.slider("Number of scenes", min_value=1, max_value=5, value=3)
            
            # Image generation options
            st.subheader("Image Settings")
            
            # Image generator selection
            image_generator = st.radio(
                "Image generation engine:",
                options=["DALL-E 3", "Stable Diffusion (Replicate)"],
                index=0,
                horizontal=True
            )
            st.session_state.image_generator = image_generator
            
            # Image style selection
            image_style = st.selectbox(
                "Illustration style:",
                options=["Disney/Pixar", "Watercolor", "Comic Book", "Claymation", "Anime", "Storybook"],
                index=0
            )
            st.session_state.image_style = image_style
            
            generate_button = st.button("✨ Generate Story", use_container_width=True)
            
            if generate_button:
                with st.spinner("Writing story with AI..."):
                    story_text = generate_story(protagonist, theme)
                    st.session_state.story = story_text
                    st.session_state.protagonist = protagonist
                    st.session_state.scenes = split_into_scenes(story_text, num_scenes=num_scenes)
    
    # Right column: Display area (sample image or generated content)
    with main_cols[1]:
        if "story" not in st.session_state:
            # Show sample image when no story has been generated
            st.image("https://img.freepik.com/free-vector/hand-drawn-fairy-tale-castle_23-2149423879.jpg", 
                    caption="Sample image - Generate your personalized story", 
                    use_column_width=True)
        else:
            # If story exists, add state to track which tab is selected
            if "selected_tab" not in st.session_state:
                st.session_state.selected_tab = 0
                
            # Create tabs for scene navigation
            tabs = st.tabs([f"Scene {i+1}" for i in range(len(st.session_state.scenes))])
            
            # Process each scene in tabs
            for i, (tab, scene) in enumerate(zip(tabs, st.session_state.scenes)):
                with tab:
                    # When a tab is clicked, update the selected tab index
                    st.session_state.selected_tab = i
                    
                    # Generate image if not already in session state
                    if f"image_url_{i}" not in st.session_state:
                        with st.spinner("Generating illustration..."):
                            # Get a meaningful summary for the image prompt
                            scene_summary = " ".join(scene.split()[:100])  # Limit to 100 words
                            
                            # Use selected image generator
                            if st.session_state.image_generator == "DALL-E 3":
                                img_url = generate_image_dalle(
                                    scene_summary, 
                                    st.session_state.protagonist,
                                    st.session_state.image_style
                                )
                            else:
                                img_url = generate_image_replicate(scene_summary, st.session_state.protagonist)
                                
                            st.session_state[f"image_url_{i}"] = img_url
                    
                    # Display image
                    st.image(st.session_state[f"image_url_{i}"], use_column_width=True)
                    st.caption(f"Illustration for Scene {i+1} in {st.session_state.image_style} style")
    
    # Story text display that changes based on selected tab
    if "story" in st.session_state:
        st.markdown("---")
        st.subheader("📚 Your Personalized Story")
        
        # Only display the currently selected scene
        selected_index = st.session_state.selected_tab
        st.markdown(f"### Scene {selected_index + 1}")
        st.markdown(f"<div class='scene-container'><div class='story-text'>{st.session_state.scenes[selected_index]}</div></div>", unsafe_allow_html=True)
        
        # Download options
        st.markdown("---")
        st.subheader("💾 Save your story")
        col1, col2 = st.columns(2)
        
        with col1:
            st.download_button(
                label="Download story text",
                data=st.session_state.story,
                file_name=f"story_{st.session_state.protagonist.lower().replace(' ', '_')}.txt",
                mime="text/plain"
            )
        
        # This is just a placeholder - in a real app you'd need to implement image downloading
        with col2:
            st.info("Image downloading will be available in a future version.")
        
        # Full story text
        with st.expander("View complete story"):
            st.markdown(f"<div class='story-text'>{st.session_state.story}</div>", unsafe_allow_html=True)

# Run the app
if __name__ == "__main__":
    main()