Spaces:
Sleeping
Sleeping
File size: 26,271 Bytes
0804bea a772fdc e73d9d5 0804bea e73d9d5 0804bea e73d9d5 0804bea a772fdc bbcd461 a772fdc bbcd461 a772fdc bbcd461 a772fdc 4b03444 a772fdc 4b03444 a772fdc 4b03444 bbcd461 4b03444 bbcd461 4b03444 bbcd461 a772fdc 4b03444 a772fdc 0804bea 0e508ae 0804bea 0e508ae 0804bea e73d9d5 0804bea a772fdc 0804bea 4b03444 0804bea 4b03444 0804bea 4b03444 e73d9d5 4b03444 0804bea 4b03444 0804bea 4b03444 a772fdc 4b03444 0804bea 4b03444 0804bea 4b03444 0804bea bbcd461 0804bea bbcd461 0804bea bbcd461 0804bea bbcd461 0804bea bbcd461 0804bea bbcd461 0804bea bbcd461 0804bea bbcd461 0804bea bbcd461 0804bea bbcd461 0804bea bbcd461 0804bea 4b03444 0804bea 4b03444 0804bea 4b03444 0804bea 4b03444 0804bea 4b03444 0804bea 4b03444 0804bea 4b03444 0804bea 4b03444 0804bea 4b03444 0804bea 4b03444 0804bea 4b03444 0804bea 4b03444 0804bea 4b03444 0804bea 4b03444 0804bea 4b03444 0804bea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 |
import os
import streamlit as st
import requests
from PIL import Image
from io import BytesIO
import replicate
from dotenv import load_dotenv
import json
import time
# Page configuration must be the first Streamlit command
st.set_page_config(page_title="CuentAI – AI Story Maker", layout="wide")
# Load environment variables
load_dotenv()
# Configuración de APIs
replicate_token = os.getenv("REPLICATE_API_TOKEN")
openai_api_key = os.getenv("OPENAI_API_KEY")
# Funciones para comunicarse con OpenAI directamente a través de requests en lugar de usar el SDK
def openai_chat_completion(prompt, model="gpt-3.5-turbo", temperature=0.8, max_tokens=600):
"""Usa la API de OpenAI directamente a través de requests para evitar problemas con el SDK"""
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {openai_api_key}"
}
payload = {
"model": model,
"messages": [{"role": "user", "content": prompt}],
"temperature": float(temperature),
"max_tokens": int(max_tokens)
}
try:
with st.spinner("Generando historia personalizada..."):
response = requests.post(
"https://api.openai.com/v1/chat/completions",
headers=headers,
json=payload,
timeout=60 # Aumentar timeout para evitar errores por tiempo
)
# Si hay un error, mostrar el detalle de forma discreta
if response.status_code != 200:
st.error("No se pudo generar la historia. Intenta de nuevo más tarde.")
return None
response_data = response.json()
return response_data["choices"][0]["message"]["content"]
except Exception as e:
st.error(f"Error con la API de OpenAI: {str(e)}")
return None
def openai_generate_image(prompt, size="1024x1024", quality="standard"):
"""Generate images with DALL-E 3 directly via requests"""
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {openai_api_key}"
}
payload = {
"model": "dall-e-3",
"prompt": prompt,
"n": 1,
"size": size,
"quality": quality
}
try:
with st.spinner("Creating magical illustration..."):
response = requests.post(
"https://api.openai.com/v1/images/generations",
headers=headers,
json=payload,
timeout=90 # Extended timeout for high quality images
)
if response.status_code != 200:
st.error(f"Could not generate the illustration. Using placeholder image. Error: {response.text}")
return "https://via.placeholder.com/512x512.png?text=Image+Not+Available"
return response.json()["data"][0]["url"]
except Exception as e:
# Discrete error without technical messages
st.error(f"There was a problem creating the illustration: {str(e)}")
return "https://via.placeholder.com/512x512.png?text=Image+Not+Available"
# Optional TTS setup
# Uncomment if using Google Cloud TTS
# from google.cloud import texttospeech
# Function to load prompt templates
def load_prompt(file_path, default_prompt=""):
"""Cargar un archivo de prompt o devolver un valor predeterminado si falla"""
try:
with open(file_path, 'r', encoding='utf-8') as file:
return file.read().strip()
except Exception as e:
# No mostrar error si tenemos un valor predeterminado
if not default_prompt:
st.warning(f"No se pudo cargar el archivo {file_path}. Usando prompt predeterminado.")
return default_prompt
# Story generation function
def generate_story(name: str, theme: str) -> str:
"""
Prompt GPT-4 to write a 300–400 word children's story in English,
with protagonist {name} and theme {theme}. Use playful tone,
simple dialogue, and a clear beginning, middle, and end.
"""
# Prompt predeterminado integrado en caso de que fallen los archivos externos
default_template = (
"You are a children's story author. "
"Write a 300-400 word children's story where the protagonist is named {name} "
"and the plot is about {theme}. Use a friendly style and simple dialogue. "
"Include a clear beginning, middle, and end."
)
# Intentar cargar desde archivo, con fallback al predeterminado
prompt_template = load_prompt("prompts/story_prompt.txt", default_template)
# Format the prompt with user inputs
prompt = prompt_template.format(name=name, theme=theme)
try:
# Usar nuestra función personalizada que se comunica directamente con la API
story = openai_chat_completion(prompt)
if story:
return story.strip()
else:
raise Exception("No se pudo generar la historia")
except Exception as e:
st.error(f"Error generating story: {e}")
# Return a fallback story for demo purposes
return f"""
# The Great Discovery of {name}
Once upon a time, there was a child named {name} who dreamed about {theme}.
On a sunny day, {name} decided to explore the garden of their house. Among the flowers and trees,
they found a small door that had never been seen before.
"What could this be?" {name} wondered curiously.
Upon opening the door, they discovered a magical world full of bright colors and fantastic creatures.
"Welcome!" said a talking butterfly. "We've been waiting for you."
{name} spent the entire day meeting new friends and learning about the importance of caring for nature.
When they returned home, they promised to come back soon and share their adventures with all their friends.
The End.
"""
# Scene segmentation
def split_into_scenes(text: str, num_scenes: int = 3) -> list[str]:
"""
Split story into specified number of scenes, trying to preserve paragraph structure.
"""
# Split by paragraphs if possible, else chunk by word count
paras = [p for p in text.split("\n") if p.strip()]
if len(paras) >= num_scenes:
# Combine paragraphs to get desired number of scenes
result = []
paragraphs_per_scene = len(paras) // num_scenes
for i in range(num_scenes):
start_idx = i * paragraphs_per_scene
end_idx = start_idx + paragraphs_per_scene if i < num_scenes - 1 else len(paras)
result.append("\n".join(paras[start_idx:end_idx]))
return result
else:
# If not enough paragraphs, split by word count
words = text.split()
chunk_size = len(words) // num_scenes
return [" ".join(words[i*chunk_size : (i+1)*chunk_size]) for i in range(num_scenes)]
# Image generation with DALL-E 3
def generate_image_dalle(prompt: str, protagonist: str, style: str = "Disney/Pixar") -> str:
"""
Call OpenAI Image API to create one 1024x1024 image from the prompt with distinct visual styles.
Returns the image URL.
"""
# Get character gender for proper styling
character_gender = "gender-neutral" # Default
# Comprehensive style definitions with strong visual differences
style_templates = {
"Disney/Pixar": {
"description": "3D Pixar animation style",
"prompt": f"Create a stunning Pixar/Disney 3D animation style illustration. The scene shows {protagonist} {{scene_description}}. Render in the exact distinctive style of Pixar's 'Coco' or 'Soul' with volumetric lighting, subsurface scattering on skin, and detailed texturing. Use vibrant colors, expressive character designs with large eyes, and cinematic composition. The image should have depth of field, strong emotional expressions, and a polished, rendered feel."
},
"Watercolor": {
"description": "Delicate watercolor painting",
"prompt": f"Create a delicate watercolor illustration of {protagonist} {{scene_description}}. Use soft transparent layers with visible paper texture and bleeding colors. The style should resemble classic Beatrix Potter or Maurice Sendak watercolors with gentle brush strokes, subtle color washes, and minimal line work. Include soft edges, color gradients, and the distinctive bleeding effect of watercolor on paper. The palette should use pastel tones with occasional vivid accents."
},
"Comic Book": {
"description": "Bold comic book art",
"prompt": f"Create a dynamic comic book style illustration showing {protagonist} {{scene_description}}. Use the distinctive style of modern comic books with bold black outlines, flat color fills, dramatic perspectives, and action lines. Include comic-specific elements like dramatic shadows, exaggerated expressions, and dynamic poses. The colors should be vibrant with strong contrasts, reminiscent of Marvel or DC comic art with cel-shading techniques."
},
"Claymation": {
"description": "Handcrafted clay animation",
"prompt": f"Create a claymation/stop-motion style illustration of {protagonist} {{scene_description}}. The image should look exactly like a photograph of handcrafted clay figures in a miniature set, similar to Aardman's 'Wallace and Gromit' or Laika's work. Show visible fingerprint textures in the clay, slightly imperfect proportions, and the distinctive matte finish of modeling clay. Include miniature handcrafted props, visible set construction, and the characteristic charm of stop-motion."
},
"Anime": {
"description": "Japanese anime style",
"prompt": f"Create a Japanese anime-style illustration showing {protagonist} {{scene_description}}. Use the distinctive anime aesthetic with large expressive eyes, simplified facial features, and stylized colorful hair. The style should feature cel-shaded colors, speed lines for movement, and exaggerated emotional expressions. Include anime-specific visual elements like dramatic lighting effects, simplified backgrounds with depth, and the clean linework characteristic of Studio Ghibli or modern anime productions."
},
"Storybook": {
"description": "Classic children's book illustration",
"prompt": f"Create a classic children's storybook illustration showing {protagonist} {{scene_description}}. The style should resemble vintage children's books with intricate hand-drawn details, rich textures, and a warm, nostalgic quality. Use the distinctive illustration style of classic illustrators like E.H. Shepard (Winnie the Pooh) or Quentin Blake (Roald Dahl books) with detailed linework, subtle watercolor washes, and charming character designs. Include fine pen details, crosshatching, and the distinctive page-like quality of traditional book illustrations."
}
}
# Get the specific style information
style_info = style_templates.get(style, style_templates["Disney/Pixar"])
# Base prompt with the style-specific instructions
base_prompt = style_info["prompt"].format(scene_description=prompt)
# Add strong anti-text instructions
full_prompt = f"{base_prompt}\n\nCRITICAL REQUIREMENTS:\n" \
f"1. The image MUST NOT contain ANY text, words, letters, numbers, or writing of any kind.\n" \
f"2. Do not include speech bubbles, captions, labels, signs, or any other textual elements.\n" \
f"3. Focus exclusively on illustrating the visual scene without attempting to include any written language.\n" \
f"4. The illustration should communicate entirely through visual means only.\n\n" \
f"Create a complete, finished illustration with a clear foreground and background."
try:
# Use our custom function that communicates directly with the API
# Use a larger image size for better quality
image_url = openai_generate_image(full_prompt, size="1024x1024", quality="hd")
if image_url:
return image_url
else:
raise Exception("Could not generate the image")
except Exception as e:
st.error("Could not generate the illustration")
# Return a placeholder image URL
return "https://via.placeholder.com/512x512.png?text=Image+Not+Available"
# Optional: Image generation with Replicate (Stable Diffusion)
def generate_image_replicate(prompt: str, protagonist: str) -> str:
"""
Alternative image generation using Replicate API with Stable Diffusion.
"""
if not replicate_token:
st.warning("Replicate API token not set. Using fallback image.")
return "https://via.placeholder.com/512x512.png?text=Replicate+API+Token+Missing"
# Load image prompt template
img_prompt_template = load_prompt("prompts/image_prompt.txt")
if not img_prompt_template:
img_prompt_template = "Crea una ilustración de estilo infantil y colorido para un cuento para niños. La escena muestra: {scene_description} Con {protagonist_name} como personaje principal."
# Format the prompt with user inputs
full_prompt = img_prompt_template.format(
scene_description=prompt,
protagonist_name=protagonist
)
try:
client = replicate.Client(api_token=replicate_token)
output = client.run(
"stability-ai/sdxl:2b017d9b67edd2ee1401238df49d75da53c523f36e363881e057f5dc3ed3c5b2",
input={"prompt": full_prompt}
)
if output and isinstance(output, list) and len(output) > 0:
return output[0]
else:
raise Exception("No output from Replicate API")
except Exception as e:
st.error(f"Error generating image with Replicate: {e}")
return "https://via.placeholder.com/512x512.png?text=Replicate+Image+Failed"
# Optional Audio TTS function
def generate_audio_tts(text: str, filename="narration.mp3") -> str:
"""
Generate audio narration from text using Google Cloud TTS.
"""
# Check if Google Cloud TTS is available
try:
from google.cloud import texttospeech
# Load TTS prompt template
tts_params = load_prompt("prompts/tts_prompt.txt")
client = texttospeech.TextToSpeechClient()
input_text = texttospeech.SynthesisInput(text=text)
voice = texttospeech.VoiceSelectionParams(
language_code="en-US",
ssml_gender=texttospeech.SsmlVoiceGender.FEMALE
)
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3
)
response = client.synthesize_speech(
input=input_text,
voice=voice,
audio_config=audio_config
)
with open(filename, "wb") as out:
out.write(response.audio_content)
return filename
except ImportError:
st.warning("Google Cloud Text-to-Speech is not installed. Skipping audio generation.")
return None
except Exception as e:
st.error(f"Error generating audio: {e}")
return None
# Custom CSS for child-friendly interface
def set_custom_css():
st.markdown("""
<style>
@import url('https://fonts.googleapis.com/css2?family=Quicksand:wght@400;500;600;700&display=swap');
* {
font-family: 'Quicksand', sans-serif;
}
h1, h2, h3 {
color: #6C63FF;
font-weight: 700;
}
h1 {
font-size: 2.5rem;
margin-bottom: 0.5rem;
}
h3 {
font-size: 1.5rem;
font-weight: 500;
opacity: 0.8;
margin-bottom: 2rem;
}
.stApp {
background-color: #F8F9FA;
}
.stButton>button {
background-color: #6C63FF;
color: white;
font-weight: 600;
border-radius: 50px;
padding: 12px 24px;
border: none;
transition: all 0.3s ease;
font-size: 16px;
box-shadow: 0 4px 6px rgba(108, 99, 255, 0.2);
}
.stButton>button:hover {
background-color: #5A52E0;
transform: translateY(-2px);
box-shadow: 0 6px 10px rgba(108, 99, 255, 0.3);
}
.stButton>button:active {
transform: translateY(0);
box-shadow: 0 2px 4px rgba(108, 99, 255, 0.2);
}
/* Colorful tabs for scenes */
.stTabs [data-baseweb="tab-list"] {
gap: 8px;
}
.stTabs [data-baseweb="tab"] {
background-color: #F0F0FF;
border-radius: 10px 10px 0 0;
padding: 8px 16px;
border: none;
}
.stTabs [aria-selected="true"] {
background-color: #6C63FF !important;
color: white !important;
}
/* Card-like containers for each scene */
.scene-container {
background-color: white;
padding: 24px;
border-radius: 16px;
box-shadow: 0 8px 16px rgba(0,0,0,0.08);
margin-bottom: 24px;
border: 1px solid #f0f0f0;
}
.story-text {
font-size: 18px;
line-height: 1.7;
color: #333;
}
/* Improve sidebar appearance */
[data-testid="stSidebar"] > div:first-child {
background-color: #F9F7FF;
padding: 2rem 1rem;
}
/* Error and success messages */
.element-container div[data-testid="stAlert"] {
border-radius: 10px;
padding: 12px;
}
/* Input fields */
.stTextInput>div>div>input {
border-radius: 10px;
border: 2px solid #E0E0FF;
padding: 12px 16px;
font-size: 16px;
}
.stTextInput>div>div>input:focus {
border-color: #6C63FF;
box-shadow: 0 0 0 2px rgba(108, 99, 255, 0.2);
}
/* Slider styling */
.stSlider [data-baseweb="slider"] {
height: 6px;
}
.stSlider [data-baseweb="thumb"] {
background-color: #6C63FF;
border-color: #6C63FF;
width: 20px;
height: 20px;
}
</style>
""", unsafe_allow_html=True)
# Streamlit UI
def main():
# Apply custom CSS
set_custom_css()
# Header
st.title("🧐♂️ CuentAI – AI Story Generator")
st.markdown("### Create personalized stories with AI-generated images")
# Sidebar with explanation and info moved to sidebar
with st.sidebar:
st.subheader("About CuentAI")
st.write("""
CuentAI is an application that uses artificial intelligence to create personalized children's stories in English,
with automatically generated illustrations for each scene of the story.
**How it works:**
1. Enter the protagonist's name
2. Choose a theme for the story
3. Click on "Generate Story"
4. Enjoy your personalized story with images!
""")
st.subheader("Technologies")
st.write("""
- OpenAI GPT-3.5 for generating text
- DALL-E 3 for creating illustrations
- Streamlit for the web interface
""")
# Optional TTS toggle moved to sidebar
if "story" in st.session_state:
st.markdown("---")
st.subheader("🔊 Audio Narration")
if st.checkbox("Include audio narration"):
# Check if TTS is imported
try:
from google.cloud import texttospeech
with st.spinner("Generating audio..."):
if "audio_file" not in st.session_state:
audio_file = generate_audio_tts(st.session_state.story)
st.session_state.audio_file = audio_file
if st.session_state.audio_file:
st.audio(st.session_state.audio_file)
else:
st.warning("Could not generate audio. Please check your Google Cloud configuration.")
except ImportError:
st.warning("""
The narration feature requires Google Cloud Text-to-Speech.
To enable this feature:
1. Install the library: `pip install google-cloud-texttospeech`
2. Configure your Google Cloud credentials
""")
# Main content area with two columns: inputs on left, display on right
main_cols = st.columns([1, 1])
# Left column: Input form
with main_cols[0]:
st.subheader("Customize your story")
# Form inputs
with st.container():
protagonist = st.text_input("Protagonist Name", "Alice")
theme = st.text_input("Story Theme", "exploring a magical jungle")
num_scenes = st.slider("Number of scenes", min_value=1, max_value=5, value=3)
# Image generation options
st.subheader("Image Settings")
# Image generator selection
image_generator = st.radio(
"Image generation engine:",
options=["DALL-E 3", "Stable Diffusion (Replicate)"],
index=0,
horizontal=True
)
st.session_state.image_generator = image_generator
# Image style selection
image_style = st.selectbox(
"Illustration style:",
options=["Disney/Pixar", "Watercolor", "Comic Book", "Claymation", "Anime", "Storybook"],
index=0
)
st.session_state.image_style = image_style
generate_button = st.button("✨ Generate Story", use_container_width=True)
if generate_button:
with st.spinner("Writing story with AI..."):
story_text = generate_story(protagonist, theme)
st.session_state.story = story_text
st.session_state.protagonist = protagonist
st.session_state.scenes = split_into_scenes(story_text, num_scenes=num_scenes)
# Right column: Display area (sample image or generated content)
with main_cols[1]:
if "story" not in st.session_state:
# Show sample image when no story has been generated
st.image("https://img.freepik.com/free-vector/hand-drawn-fairy-tale-castle_23-2149423879.jpg",
caption="Sample image - Generate your personalized story",
use_column_width=True)
else:
# If story exists, add state to track which tab is selected
if "selected_tab" not in st.session_state:
st.session_state.selected_tab = 0
# Create tabs for scene navigation
tabs = st.tabs([f"Scene {i+1}" for i in range(len(st.session_state.scenes))])
# Process each scene in tabs
for i, (tab, scene) in enumerate(zip(tabs, st.session_state.scenes)):
with tab:
# When a tab is clicked, update the selected tab index
st.session_state.selected_tab = i
# Generate image if not already in session state
if f"image_url_{i}" not in st.session_state:
with st.spinner("Generating illustration..."):
# Get a meaningful summary for the image prompt
scene_summary = " ".join(scene.split()[:100]) # Limit to 100 words
# Use selected image generator
if st.session_state.image_generator == "DALL-E 3":
img_url = generate_image_dalle(
scene_summary,
st.session_state.protagonist,
st.session_state.image_style
)
else:
img_url = generate_image_replicate(scene_summary, st.session_state.protagonist)
st.session_state[f"image_url_{i}"] = img_url
# Display image
st.image(st.session_state[f"image_url_{i}"], use_column_width=True)
st.caption(f"Illustration for Scene {i+1} in {st.session_state.image_style} style")
# Story text display that changes based on selected tab
if "story" in st.session_state:
st.markdown("---")
st.subheader("📚 Your Personalized Story")
# Only display the currently selected scene
selected_index = st.session_state.selected_tab
st.markdown(f"### Scene {selected_index + 1}")
st.markdown(f"<div class='scene-container'><div class='story-text'>{st.session_state.scenes[selected_index]}</div></div>", unsafe_allow_html=True)
# Download options
st.markdown("---")
st.subheader("💾 Save your story")
col1, col2 = st.columns(2)
with col1:
st.download_button(
label="Download story text",
data=st.session_state.story,
file_name=f"story_{st.session_state.protagonist.lower().replace(' ', '_')}.txt",
mime="text/plain"
)
# This is just a placeholder - in a real app you'd need to implement image downloading
with col2:
st.info("Image downloading will be available in a future version.")
# Full story text
with st.expander("View complete story"):
st.markdown(f"<div class='story-text'>{st.session_state.story}</div>", unsafe_allow_html=True)
# Run the app
if __name__ == "__main__":
main()
|