jarondon82 commited on
Commit
4b03444
·
verified ·
1 Parent(s): bbcd461

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +169 -105
app.py CHANGED
@@ -53,8 +53,8 @@ def openai_chat_completion(prompt, model="gpt-3.5-turbo", temperature=0.8, max_t
53
  st.error(f"Error con la API de OpenAI: {str(e)}")
54
  return None
55
 
56
- def openai_generate_image(prompt):
57
- """Genera imágenes con DALL-E 3 directamente a través de requests"""
58
  headers = {
59
  "Content-Type": "application/json",
60
  "Authorization": f"Bearer {openai_api_key}"
@@ -64,28 +64,28 @@ def openai_generate_image(prompt):
64
  "model": "dall-e-3",
65
  "prompt": prompt,
66
  "n": 1,
67
- "size": "1024x1024",
68
- "quality": "standard"
69
  }
70
 
71
  try:
72
- with st.spinner("Creando ilustración mágica..."):
73
  response = requests.post(
74
  "https://api.openai.com/v1/images/generations",
75
  headers=headers,
76
  json=payload,
77
- timeout=60
78
  )
79
 
80
  if response.status_code != 200:
81
- st.error("No se pudo generar la ilustración. Se usará una imagen de muestra.")
82
- return "https://via.placeholder.com/512x512.png?text=Imagen+No+Disponible"
83
 
84
  return response.json()["data"][0]["url"]
85
  except Exception as e:
86
- # Error discreto sin mensajes técnicos
87
- st.error("Ocurrió un problema al crear la ilustración.")
88
- return "https://via.placeholder.com/512x512.png?text=Imagen+No+Disponible"
89
 
90
  # Optional TTS setup
91
  # Uncomment if using Google Cloud TTS
@@ -179,38 +179,73 @@ def split_into_scenes(text: str, num_scenes: int = 3) -> list[str]:
179
  return [" ".join(words[i*chunk_size : (i+1)*chunk_size]) for i in range(num_scenes)]
180
 
181
  # Image generation with DALL-E 3
182
- def generate_image_dalle(prompt: str, protagonist: str) -> str:
183
  """
184
- Call OpenAI Image API to create one 512×512 image from the prompt.
185
  Returns the image URL.
186
  """
187
- # Prompt predeterminado para generación de imágenes
188
- default_template = (
189
- "Create a colorful children's book style illustration showing: {scene_description} "
190
- "With {protagonist_name} as the main character. Use bright colors, simple shapes, "
191
- "and a child-friendly art style."
192
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
- # Intentar cargar el prompt desde archivo con fallback al predeterminado
195
- img_prompt_template = load_prompt("prompts/image_prompt.txt", default_template)
196
 
197
- # Format the prompt with user inputs
198
- full_prompt = img_prompt_template.format(
199
- scene_description=prompt,
200
- protagonist_name=protagonist
201
- )
 
 
 
 
 
202
 
203
  try:
204
- # Usar nuestra función personalizada que se comunica directamente con la API
205
- image_url = openai_generate_image(full_prompt)
 
206
  if image_url:
207
  return image_url
208
  else:
209
- raise Exception("No se pudo generar la imagen")
210
  except Exception as e:
211
- st.error(f"Error generating image: {e}")
212
  # Return a placeholder image URL
213
- return "https://via.placeholder.com/512x512.png?text=Image+Generation+Failed"
214
 
215
  # Optional: Image generation with Replicate (Stable Diffusion)
216
  def generate_image_replicate(prompt: str, protagonist: str) -> str:
@@ -420,7 +455,7 @@ def main():
420
  st.title("🧐‍♂️ CuentAI – AI Story Generator")
421
  st.markdown("### Create personalized stories with AI-generated images")
422
 
423
- # Sidebar with explanation
424
  with st.sidebar:
425
  st.subheader("About CuentAI")
426
  st.write("""
@@ -436,66 +471,114 @@ def main():
436
 
437
  st.subheader("Technologies")
438
  st.write("""
439
- - OpenAI GPT-4 for generating text
440
  - DALL-E 3 for creating illustrations
441
  - Streamlit for the web interface
442
  """)
443
 
444
- # Image generation options
445
- st.subheader("Options")
446
- image_generator = st.radio(
447
- "Image generation engine:",
448
- options=["DALL-E 3", "Stable Diffusion (Replicate)"],
449
- index=0
450
- )
451
- st.session_state.image_generator = image_generator
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
 
453
- # Input form
454
- col1, col2 = st.columns(2)
455
 
456
- with col1:
 
457
  st.subheader("Customize your story")
458
- protagonist = st.text_input("Protagonist Name", "Alice")
459
- theme = st.text_input("Story Theme", "explores a magical jungle")
460
- num_scenes = st.slider("Number of scenes", min_value=1, max_value=5, value=3)
461
-
462
- generate_button = st.button("✨ Generate Story")
463
 
464
- if generate_button:
465
- with st.spinner("Writing story with GPT-4..."):
466
- story_text = generate_story(protagonist, theme)
467
- st.session_state.story = story_text
468
- st.session_state.protagonist = protagonist
469
- st.session_state.scenes = split_into_scenes(story_text, num_scenes=num_scenes)
470
-
471
- with col2:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
472
  if "story" not in st.session_state:
 
473
  st.image("https://img.freepik.com/free-vector/hand-drawn-fairy-tale-castle_23-2149423879.jpg",
474
  caption="Sample image - Generate your personalized story",
475
  use_column_width=True)
476
-
477
- # Display story and images
478
- if "story" in st.session_state:
479
- st.markdown("---")
480
- st.subheader("📚 Your Personalized Story")
481
-
482
- # Choose one layout: tabs, pagination, or scroll
483
- tabs = st.tabs([f"Scene {i+1}" for i in range(len(st.session_state.scenes))])
484
-
485
- for i, (tab, scene) in enumerate(zip(tabs, st.session_state.scenes)):
486
- with tab:
487
- col1, col2 = st.columns([1, 1])
488
 
489
- with col1:
 
 
 
 
 
 
 
 
490
  # Generate image if not already in session state
491
  if f"image_url_{i}" not in st.session_state:
492
  with st.spinner("Generating illustration..."):
493
- # Get first 100 words for the prompt to avoid token limits
494
- scene_summary = " ".join(scene.split()[:100])
495
 
496
  # Use selected image generator
497
  if st.session_state.image_generator == "DALL-E 3":
498
- img_url = generate_image_dalle(scene_summary, st.session_state.protagonist)
 
 
 
 
499
  else:
500
  img_url = generate_image_replicate(scene_summary, st.session_state.protagonist)
501
 
@@ -503,40 +586,17 @@ def main():
503
 
504
  # Display image
505
  st.image(st.session_state[f"image_url_{i}"], use_column_width=True)
506
- st.caption(f"Illustration generated for Scene {i+1}")
507
-
508
- with col2:
509
- st.markdown(f"<div class='scene-container'><div class='story-text'>{scene}</div></div>", unsafe_allow_html=True)
510
-
511
- # Full story text
512
- with st.expander("View complete story"):
513
- st.markdown(f"<div class='story-text'>{st.session_state.story}</div>", unsafe_allow_html=True)
514
-
515
- # Optional TTS toggle
516
  st.markdown("---")
517
- st.subheader("🔊 Narration")
518
 
519
- if st.checkbox("Include audio narration"):
520
- # Check if TTS is imported
521
- try:
522
- from google.cloud import texttospeech
523
- with st.spinner("Generando audio..."):
524
- if "audio_file" not in st.session_state:
525
- audio_file = generate_audio_tts(st.session_state.story)
526
- st.session_state.audio_file = audio_file
527
-
528
- if st.session_state.audio_file:
529
- st.audio(st.session_state.audio_file)
530
- else:
531
- st.warning("Could not generate audio. Please check your Google Cloud configuration.")
532
- except ImportError:
533
- st.warning("""
534
- The narration feature requires Google Cloud Text-to-Speech.
535
-
536
- To enable this feature:
537
- 1. Install the library: `pip install google-cloud-texttospeech`
538
- 2. Configure your Google Cloud credentials
539
- """)
540
 
541
  # Download options
542
  st.markdown("---")
@@ -554,6 +614,10 @@ def main():
554
  # This is just a placeholder - in a real app you'd need to implement image downloading
555
  with col2:
556
  st.info("Image downloading will be available in a future version.")
 
 
 
 
557
 
558
  # Run the app
559
  if __name__ == "__main__":
 
53
  st.error(f"Error con la API de OpenAI: {str(e)}")
54
  return None
55
 
56
+ def openai_generate_image(prompt, size="1024x1024", quality="standard"):
57
+ """Generate images with DALL-E 3 directly via requests"""
58
  headers = {
59
  "Content-Type": "application/json",
60
  "Authorization": f"Bearer {openai_api_key}"
 
64
  "model": "dall-e-3",
65
  "prompt": prompt,
66
  "n": 1,
67
+ "size": size,
68
+ "quality": quality
69
  }
70
 
71
  try:
72
+ with st.spinner("Creating magical illustration..."):
73
  response = requests.post(
74
  "https://api.openai.com/v1/images/generations",
75
  headers=headers,
76
  json=payload,
77
+ timeout=90 # Extended timeout for high quality images
78
  )
79
 
80
  if response.status_code != 200:
81
+ st.error(f"Could not generate the illustration. Using placeholder image. Error: {response.text}")
82
+ return "https://via.placeholder.com/512x512.png?text=Image+Not+Available"
83
 
84
  return response.json()["data"][0]["url"]
85
  except Exception as e:
86
+ # Discrete error without technical messages
87
+ st.error(f"There was a problem creating the illustration: {str(e)}")
88
+ return "https://via.placeholder.com/512x512.png?text=Image+Not+Available"
89
 
90
  # Optional TTS setup
91
  # Uncomment if using Google Cloud TTS
 
179
  return [" ".join(words[i*chunk_size : (i+1)*chunk_size]) for i in range(num_scenes)]
180
 
181
  # Image generation with DALL-E 3
182
+ def generate_image_dalle(prompt: str, protagonist: str, style: str = "Disney/Pixar") -> str:
183
  """
184
+ Call OpenAI Image API to create one 1024x1024 image from the prompt with distinct visual styles.
185
  Returns the image URL.
186
  """
187
+ # Get character gender for proper styling
188
+ character_gender = "gender-neutral" # Default
189
+
190
+ # Comprehensive style definitions with strong visual differences
191
+ style_templates = {
192
+ "Disney/Pixar": {
193
+ "description": "3D Pixar animation style",
194
+ "prompt": f"Create a stunning Pixar/Disney 3D animation style illustration. The scene shows {protagonist} {{scene_description}}. Render in the exact distinctive style of Pixar's 'Coco' or 'Soul' with volumetric lighting, subsurface scattering on skin, and detailed texturing. Use vibrant colors, expressive character designs with large eyes, and cinematic composition. The image should have depth of field, strong emotional expressions, and a polished, rendered feel."
195
+ },
196
+
197
+ "Watercolor": {
198
+ "description": "Delicate watercolor painting",
199
+ "prompt": f"Create a delicate watercolor illustration of {protagonist} {{scene_description}}. Use soft transparent layers with visible paper texture and bleeding colors. The style should resemble classic Beatrix Potter or Maurice Sendak watercolors with gentle brush strokes, subtle color washes, and minimal line work. Include soft edges, color gradients, and the distinctive bleeding effect of watercolor on paper. The palette should use pastel tones with occasional vivid accents."
200
+ },
201
+
202
+ "Comic Book": {
203
+ "description": "Bold comic book art",
204
+ "prompt": f"Create a dynamic comic book style illustration showing {protagonist} {{scene_description}}. Use the distinctive style of modern comic books with bold black outlines, flat color fills, dramatic perspectives, and action lines. Include comic-specific elements like dramatic shadows, exaggerated expressions, and dynamic poses. The colors should be vibrant with strong contrasts, reminiscent of Marvel or DC comic art with cel-shading techniques."
205
+ },
206
+
207
+ "Claymation": {
208
+ "description": "Handcrafted clay animation",
209
+ "prompt": f"Create a claymation/stop-motion style illustration of {protagonist} {{scene_description}}. The image should look exactly like a photograph of handcrafted clay figures in a miniature set, similar to Aardman's 'Wallace and Gromit' or Laika's work. Show visible fingerprint textures in the clay, slightly imperfect proportions, and the distinctive matte finish of modeling clay. Include miniature handcrafted props, visible set construction, and the characteristic charm of stop-motion."
210
+ },
211
+
212
+ "Anime": {
213
+ "description": "Japanese anime style",
214
+ "prompt": f"Create a Japanese anime-style illustration showing {protagonist} {{scene_description}}. Use the distinctive anime aesthetic with large expressive eyes, simplified facial features, and stylized colorful hair. The style should feature cel-shaded colors, speed lines for movement, and exaggerated emotional expressions. Include anime-specific visual elements like dramatic lighting effects, simplified backgrounds with depth, and the clean linework characteristic of Studio Ghibli or modern anime productions."
215
+ },
216
+
217
+ "Storybook": {
218
+ "description": "Classic children's book illustration",
219
+ "prompt": f"Create a classic children's storybook illustration showing {protagonist} {{scene_description}}. The style should resemble vintage children's books with intricate hand-drawn details, rich textures, and a warm, nostalgic quality. Use the distinctive illustration style of classic illustrators like E.H. Shepard (Winnie the Pooh) or Quentin Blake (Roald Dahl books) with detailed linework, subtle watercolor washes, and charming character designs. Include fine pen details, crosshatching, and the distinctive page-like quality of traditional book illustrations."
220
+ }
221
+ }
222
 
223
+ # Get the specific style information
224
+ style_info = style_templates.get(style, style_templates["Disney/Pixar"])
225
 
226
+ # Base prompt with the style-specific instructions
227
+ base_prompt = style_info["prompt"].format(scene_description=prompt)
228
+
229
+ # Add strong anti-text instructions
230
+ full_prompt = f"{base_prompt}\n\nCRITICAL REQUIREMENTS:\n" \
231
+ f"1. The image MUST NOT contain ANY text, words, letters, numbers, or writing of any kind.\n" \
232
+ f"2. Do not include speech bubbles, captions, labels, signs, or any other textual elements.\n" \
233
+ f"3. Focus exclusively on illustrating the visual scene without attempting to include any written language.\n" \
234
+ f"4. The illustration should communicate entirely through visual means only.\n\n" \
235
+ f"Create a complete, finished illustration with a clear foreground and background."
236
 
237
  try:
238
+ # Use our custom function that communicates directly with the API
239
+ # Use a larger image size for better quality
240
+ image_url = openai_generate_image(full_prompt, size="1024x1024", quality="hd")
241
  if image_url:
242
  return image_url
243
  else:
244
+ raise Exception("Could not generate the image")
245
  except Exception as e:
246
+ st.error("Could not generate the illustration")
247
  # Return a placeholder image URL
248
+ return "https://via.placeholder.com/512x512.png?text=Image+Not+Available"
249
 
250
  # Optional: Image generation with Replicate (Stable Diffusion)
251
  def generate_image_replicate(prompt: str, protagonist: str) -> str:
 
455
  st.title("🧐‍♂️ CuentAI – AI Story Generator")
456
  st.markdown("### Create personalized stories with AI-generated images")
457
 
458
+ # Sidebar with explanation and info moved to sidebar
459
  with st.sidebar:
460
  st.subheader("About CuentAI")
461
  st.write("""
 
471
 
472
  st.subheader("Technologies")
473
  st.write("""
474
+ - OpenAI GPT-3.5 for generating text
475
  - DALL-E 3 for creating illustrations
476
  - Streamlit for the web interface
477
  """)
478
 
479
+ # Optional TTS toggle moved to sidebar
480
+ if "story" in st.session_state:
481
+ st.markdown("---")
482
+ st.subheader("🔊 Audio Narration")
483
+
484
+ if st.checkbox("Include audio narration"):
485
+ # Check if TTS is imported
486
+ try:
487
+ from google.cloud import texttospeech
488
+ with st.spinner("Generating audio..."):
489
+ if "audio_file" not in st.session_state:
490
+ audio_file = generate_audio_tts(st.session_state.story)
491
+ st.session_state.audio_file = audio_file
492
+
493
+ if st.session_state.audio_file:
494
+ st.audio(st.session_state.audio_file)
495
+ else:
496
+ st.warning("Could not generate audio. Please check your Google Cloud configuration.")
497
+ except ImportError:
498
+ st.warning("""
499
+ The narration feature requires Google Cloud Text-to-Speech.
500
+
501
+ To enable this feature:
502
+ 1. Install the library: `pip install google-cloud-texttospeech`
503
+ 2. Configure your Google Cloud credentials
504
+ """)
505
 
506
+ # Main content area with two columns: inputs on left, display on right
507
+ main_cols = st.columns([1, 1])
508
 
509
+ # Left column: Input form
510
+ with main_cols[0]:
511
  st.subheader("Customize your story")
 
 
 
 
 
512
 
513
+ # Form inputs
514
+ with st.container():
515
+ protagonist = st.text_input("Protagonist Name", "Alice")
516
+ theme = st.text_input("Story Theme", "exploring a magical jungle")
517
+ num_scenes = st.slider("Number of scenes", min_value=1, max_value=5, value=3)
518
+
519
+ # Image generation options
520
+ st.subheader("Image Settings")
521
+
522
+ # Image generator selection
523
+ image_generator = st.radio(
524
+ "Image generation engine:",
525
+ options=["DALL-E 3", "Stable Diffusion (Replicate)"],
526
+ index=0,
527
+ horizontal=True
528
+ )
529
+ st.session_state.image_generator = image_generator
530
+
531
+ # Image style selection
532
+ image_style = st.selectbox(
533
+ "Illustration style:",
534
+ options=["Disney/Pixar", "Watercolor", "Comic Book", "Claymation", "Anime", "Storybook"],
535
+ index=0
536
+ )
537
+ st.session_state.image_style = image_style
538
+
539
+ generate_button = st.button("✨ Generate Story", use_container_width=True)
540
+
541
+ if generate_button:
542
+ with st.spinner("Writing story with AI..."):
543
+ story_text = generate_story(protagonist, theme)
544
+ st.session_state.story = story_text
545
+ st.session_state.protagonist = protagonist
546
+ st.session_state.scenes = split_into_scenes(story_text, num_scenes=num_scenes)
547
+
548
+ # Right column: Display area (sample image or generated content)
549
+ with main_cols[1]:
550
  if "story" not in st.session_state:
551
+ # Show sample image when no story has been generated
552
  st.image("https://img.freepik.com/free-vector/hand-drawn-fairy-tale-castle_23-2149423879.jpg",
553
  caption="Sample image - Generate your personalized story",
554
  use_column_width=True)
555
+ else:
556
+ # If story exists, add state to track which tab is selected
557
+ if "selected_tab" not in st.session_state:
558
+ st.session_state.selected_tab = 0
 
 
 
 
 
 
 
 
559
 
560
+ # Create tabs for scene navigation
561
+ tabs = st.tabs([f"Scene {i+1}" for i in range(len(st.session_state.scenes))])
562
+
563
+ # Process each scene in tabs
564
+ for i, (tab, scene) in enumerate(zip(tabs, st.session_state.scenes)):
565
+ with tab:
566
+ # When a tab is clicked, update the selected tab index
567
+ st.session_state.selected_tab = i
568
+
569
  # Generate image if not already in session state
570
  if f"image_url_{i}" not in st.session_state:
571
  with st.spinner("Generating illustration..."):
572
+ # Get a meaningful summary for the image prompt
573
+ scene_summary = " ".join(scene.split()[:100]) # Limit to 100 words
574
 
575
  # Use selected image generator
576
  if st.session_state.image_generator == "DALL-E 3":
577
+ img_url = generate_image_dalle(
578
+ scene_summary,
579
+ st.session_state.protagonist,
580
+ st.session_state.image_style
581
+ )
582
  else:
583
  img_url = generate_image_replicate(scene_summary, st.session_state.protagonist)
584
 
 
586
 
587
  # Display image
588
  st.image(st.session_state[f"image_url_{i}"], use_column_width=True)
589
+ st.caption(f"Illustration for Scene {i+1} in {st.session_state.image_style} style")
590
+
591
+ # Story text display that changes based on selected tab
592
+ if "story" in st.session_state:
 
 
 
 
 
 
593
  st.markdown("---")
594
+ st.subheader("📚 Your Personalized Story")
595
 
596
+ # Only display the currently selected scene
597
+ selected_index = st.session_state.selected_tab
598
+ st.markdown(f"### Scene {selected_index + 1}")
599
+ st.markdown(f"<div class='scene-container'><div class='story-text'>{st.session_state.scenes[selected_index]}</div></div>", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
600
 
601
  # Download options
602
  st.markdown("---")
 
614
  # This is just a placeholder - in a real app you'd need to implement image downloading
615
  with col2:
616
  st.info("Image downloading will be available in a future version.")
617
+
618
+ # Full story text
619
+ with st.expander("View complete story"):
620
+ st.markdown(f"<div class='story-text'>{st.session_state.story}</div>", unsafe_allow_html=True)
621
 
622
  # Run the app
623
  if __name__ == "__main__":