Image_generator

Sleeping

App Files Files Community

yukee1992 commited on Sep 20, 2025

Commit

c0034ac

verified ·

1 Parent(s): 6bf397f

Update app.py

Browse files

Files changed (1) hide show

app.py +188 -260

app.py CHANGED Viewed

@@ -1,24 +1,20 @@
 import gradio as gr
 import torch
-from diffusers import StableDiffusionPipeline, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler, DPMSolverMultistepScheduler
 from PIL import Image
 import io
 import requests
 import os
 from datetime import datetime
 import re
-import tempfile
 import time
-import base64
-import json
-from typing import Dict, List, Tuple, Optional
 from fastapi import FastAPI, HTTPException, BackgroundTasks
 from pydantic import BaseModel
-import random
 import gc
 import psutil
 import threading
-from functools import lru_cache
 # External OCI API URL
 OCI_API_BASE_URL = "https://yukee1992-oci-story-book.hf.space"
@@ -49,28 +45,38 @@ class StorybookRequest(BaseModel):
     story_title: str
     scenes: List[StoryScene]
     characters: List[CharacterDescription] = []
-    model_choice: str = "sdxl"
     style: str = "childrens_book"
 # MODEL SELECTION
 MODEL_CHOICES = {
-    "sdxl": "stabilityai/stable-diffusion-xl-base-1.0",
-    "sdxl-turbo": "stabilityai/sdxl-turbo",
     "dreamshaper-8": "lykon/dreamshaper-8",
     "realistic-vision": "SG161222/Realistic_Vision_V5.1",
 }
-# GLOBAL MODEL CACHE with proper locking
 model_cache = {}
 current_model_name = None
 current_pipe = None
 model_lock = threading.Lock()
 # Character consistency tracking
-character_descriptions = {}
 character_seeds = {}
-# Memory monitoring
 def monitor_memory():
     try:
         process = psutil.Process()
@@ -83,138 +89,112 @@ def cleanup_memory():
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
-def load_model(model_name="sdxl"):
-    """Thread-safe model loading with proper caching"""
     global model_cache, current_model_name, current_pipe
     with model_lock:
         if model_name in model_cache:
-            print(f"✅ Using cached model: {model_name}")
             current_pipe = model_cache[model_name]
             current_model_name = model_name
             return current_pipe
         print(f"🔄 Loading model: {model_name}")
         try:
-            if model_name in ["sdxl", "sdxl-turbo"]:
-                model_id = MODEL_CHOICES[model_name]
-                pipe = StableDiffusionXLPipeline.from_pretrained(
-                    model_id,
-                    torch_dtype=torch.float32,
-                    use_safetensors=True,
-                    safety_checker=None,
-                    requires_safety_checker=False
-                )
-                pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
-            else:
-                model_id = MODEL_CHOICES.get(model_name, "lykon/dreamshaper-8")
-                pipe = StableDiffusionPipeline.from_pretrained(
-                    model_id,
-                    torch_dtype=torch.float32,
-                    safety_checker=None,
-                    requires_safety_checker=False
-                )
-                pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
             pipe = pipe.to("cpu")
             model_cache[model_name] = pipe
             current_pipe = pipe
             current_model_name = model_name
-            print(f"✅ Model loaded and cached: {model_name}")
             return pipe
         except Exception as e:
             print(f"❌ Model loading failed: {e}")
-            pipe = StableDiffusionPipeline.from_pretrained(
-                "runwayml/stable-diffusion-v1-5",
-                torch_dtype=torch.float32
-            ).to("cpu")
-            model_cache[model_name] = pipe
-            return pipe
 # Initialize default model
 print("🚀 Initializing Storybook Generator...")
-current_pipe = load_model("sdxl")
 print("✅ Model loaded and ready!")
-# OPTIMIZED PROMPT COMPRESSION
-@lru_cache(maxsize=100)
-def compress_prompt(text, style="childrens_book"):
-    """Cache compressed prompts to avoid recomputation"""
-    # Simple compression: remove redundant words and shorten
-    words = text.split()
-    if len(words) <= 50:
-        return text
-    # Keep first 40 words (most important part) and key descriptors
-    compressed = ' '.join(words[:40])
-    # Add style context
-    style_context = {
-        "childrens_book": "children's book style",
-        "realistic": "realistic style",
-        "fantasy": "fantasy style",
-        "anime": "anime style"
-    }
-    return f"{compressed}... {style_context.get(style, '')} masterpiece 4K"
-def create_optimized_prompt(scene_visual, characters, style="childrens_book", page_number=1):
-    """Create optimized prompt within token limits"""
-    # Compress the scene visual
-    scene_compressed = compress_prompt(scene_visual, style)
-    # Extract character essentials
-    char_descriptors = []
     if characters:
         for char in characters:
-            if hasattr(char, 'name'):
-                name = char.name
-                desc = char.description
-            elif isinstance(char, dict):
-                name = char.get('name', 'Unknown')
-                desc = char.get('description', '')
-            else:
-                continue
-            # Extract key features
             import re
-            # Get species/type
             species_match = re.search(r'(rabbit|hedgehog|bird|dog|cat|fox|bear|dragon|human|girl|boy)', desc, re.IGNORECASE)
             species = species_match.group(1) if species_match else "character"
-            # Get color
             color_match = re.search(r'(white|black|brown|blue|red|green|yellow|golden|pink)', desc, re.IGNORECASE)
             color = color_match.group(1) if color_match else ""
-            char_descriptors.append(f"{color} {species}".strip())
-    # Build the final prompt
-    continuity = f"scene {page_number} " if page_number > 1 else ""
-    chars_text = f"Characters: {', '.join(char_descriptors)}. " if char_descriptors else ""
-    final_prompt = f"{continuity}{scene_compressed}. {chars_text}masterpiece best quality 4K"
     # Ensure it's under 60 words
     words = final_prompt.split()
     if len(words) > 60:
         final_prompt = ' '.join(words[:60])
     return final_prompt
 def enhance_prompt(scene_visual, characters, style="childrens_book", page_number=1):
-    """Create optimized prompt"""
-    main_prompt = create_optimized_prompt(scene_visual, characters, style, page_number)
-    print(f"📝 Optimized prompt: {main_prompt}")
-    print(f"📏 Length: {len(main_prompt.split())} words")
-    # Negative prompt
     negative_prompt = (
         "blurry, low quality, ugly, deformed, bad anatomy, "
-        "watermark, signature, text, username, multiple people, "
-        "inconsistent features, low resolution"
     )
     return main_prompt, negative_prompt
@@ -263,169 +243,143 @@ def get_character_seed(story_title, character_name, page_number):
     return character_seeds[story_title][seed_key]
-def generate_storybook_page(scene_visual, story_title, sequence_number, scene_text, characters, model_choice="sdxl", style="childrens_book"):
-    """Generate a single page - OPTIMIZED VERSION"""
-    global current_pipe, current_model_name
     try:
-        # ONLY load model if different from current
-        if model_choice != current_model_name:
-            print(f"🔄 Switching to model: {model_choice}")
-            current_pipe = load_model(model_choice)
-        else:
-            print(f"✅ Using already loaded model: {model_choice}")
         enhanced_prompt, negative_prompt = enhance_prompt(
             scene_visual, characters, style, sequence_number
         )
-        print(f"📖 Generating page {sequence_number}")
-        if characters:
-            char_names = []
-            for char in characters:
-                if hasattr(char, 'name'):
-                    char_names.append(char.name)
-                elif isinstance(char, dict):
-                    char_names.append(char.get('name', 'unknown'))
-            print(f"👤 Characters: {char_names}")
-        generator = torch.Generator(device="cpu")
         if characters:
             first_char = characters[0]
-            char_name = first_char.name if hasattr(first_char, 'name') else first_char.get('name', 'unknown')
-            main_char_seed = get_character_seed(story_title, char_name, sequence_number)
-            generator.manual_seed(main_char_seed)
-            print(f"🌱 Using seed {main_char_seed} for {char_name}")
-        else:
-            scene_seed = hash(f"{story_title}_{sequence_number}") % 1000000
-            generator.manual_seed(scene_seed)
-        # Generate image with optimized parameters
-        print("⏳ Starting image generation...")
-        start_time = time.time()
         image = current_pipe(
             prompt=enhanced_prompt,
             negative_prompt=negative_prompt,
-            num_inference_steps=25,  # Reduced from 35 for speed
             guidance_scale=7.0,
-            width=512,  # Reduced from 768 for speed
             height=512,
             generator=generator
         ).images[0]
-        gen_time = time.time() - start_time
-        print(f"✅ Image generated in {gen_time:.1f} seconds")
         save_status = save_complete_storybook_page(image, story_title, sequence_number, scene_text)
-        return image, save_status
     except Exception as e:
-        return None, f"❌ Generation failed: {str(e)}"
-def batch_generate_complete_storybook(story_title, scenes_data, characters, model_choice="sdxl", style="childrens_book"):
-    """Batch generation with significant optimizations"""
-    global current_pipe
-    results = []
-    status_messages = []
-    print(f"📚 Starting OPTIMIZED batch generation: {story_title}")
-    print(f"📖 Pages: {len(scenes_data)}")
-    print(f"👤 Characters: {len(characters)}")
-    # Load model ONCE at the beginning
-    print(f"🔧 Loading model once for entire batch...")
-    current_pipe = load_model(model_choice)
-    batch_start_time = time.time()
-    for i, scene_data in enumerate(scenes_data, 1):
-        try:
-            scene_visual = scene_data.get('visual', '')
-            scene_text = scene_data.get('text', '')
-            print(f"🔄 Generating page {i}/{len(scenes_data)}...")
-            page_start_time = time.time()
-            image, status = generate_storybook_page(
-                scene_visual, story_title, i, scene_text, characters, model_choice, style
-            )
-            page_time = time.time() - page_start_time
-            print(f"⏰ Page {i} completed in {page_time:.1f} seconds")
-            if image:
-                results.append((f"Page {i}", image, scene_text))
-            status_messages.append(f"Page {i}: {status}")
-            # Clean memory every 3 pages
-            if i % 3 == 0:
-                cleanup_memory()
-        except Exception as e:
-            error_msg = f"❌ Failed page {i}: {str(e)}"
-            print(error_msg)
-            status_messages.append(error_msg)
-    total_time = time.time() - batch_start_time
-    print(f"✅ Batch completed in {total_time:.2f} seconds")
-    print(f"📊 Average: {total_time/len(scenes_data):.1f} seconds per page")
-    return results, "\n".join(status_messages)
-# FastAPI endpoint
-@app.post("/api/generate-storybook")
 async def api_generate_storybook(request: StorybookRequest):
     try:
         print(f"📚 Received request: {request.story_title}")
         print(f"📖 Pages: {len(request.scenes)}")
         start_time = time.time()
-        scenes_data = [{"visual": scene.visual, "text": scene.text} for scene in request.scenes]
-        # Convert characters to dict ONCE
         characters_dict = []
         for char in request.characters:
-            if hasattr(char, 'dict'):
-                characters_dict.append(char.dict())
-            else:
-                characters_dict.append({"name": getattr(char, 'name', 'Unknown'),
-                                      "description": getattr(char, 'description', '')})
-        results, status = batch_generate_complete_storybook(
-            request.story_title,
-            scenes_data,
-            characters_dict,
-            request.model_choice,
-            request.style
-        )
-        generation_time = time.time() - start_time
-        return {
-            "status": "success",
-            "story_title": request.story_title,
-            "total_pages": len(request.scenes),
-            "characters_used": len(request.characters),
-            "generated_pages": len(results),
-            "generation_time": round(generation_time, 2),
-            "message": status,
-            "folder_path": f"storybook-library/stories/{request.story_title.replace(' ', '_')}/",
-            "pages": [
-                {
-                    "page_number": i+1,
-                    "image_file": f"page_{i+1:03d}_{request.story_title.replace(' ', '_')}.png",
-                    "text_file": f"page_{i+1:03d}_{request.story_title.replace(' ', '_')}.txt"
-                } for i in range(len(request.scenes))
-            ]
-        }
     except Exception as e:
-        error_msg = f"Storybook generation failed: {str(e)}"
-        print(f"❌ {error_msg}")
-        raise HTTPException(status_code=500, detail=error_msg)
 @app.get("/api/health")
 async def health_check():
@@ -438,49 +392,23 @@ async def health_check():
         "current_model": current_model_name
     }
-# Gradio Interface
-def generate_single_page(prompt, story_title, scene_text, model_choice, style):
-    if not prompt or not story_title:
-        return None, "❌ Please enter both scene description and story title"
-    global current_pipe
-    if current_model_name != model_choice:
-        current_pipe = load_model(model_choice)
-    image, status = generate_storybook_page(
-        prompt, story_title, 1, scene_text or "", [], model_choice, style
-    )
-    return image, status
 with gr.Blocks(title="Storybook Generator", theme="soft") as demo:
     gr.Markdown("# 📚 Storybook Generator")
-    gr.Markdown("Create beautiful storybooks with consistent characters")
     with gr.Row():
-        with gr.Column(scale=1):
-            story_title_input = gr.Textbox(label="Story Title", lines=1)
-            model_choice = gr.Dropdown(
-                label="AI Model",
-                choices=list(MODEL_CHOICES.keys()),
-                value="sdxl"
-            )
-            style_choice = gr.Dropdown(
-                label="Art Style",
-                choices=["childrens_book", "realistic", "fantasy", "anime"],
-                value="childrens_book"
-            )
-        with gr.Column(scale=2):
-            prompt_input = gr.Textbox(label="Visual Description", lines=5)
-            text_input = gr.Textbox(label="Story Text (Optional)", lines=2)
-            generate_btn = gr.Button("✨ Generate Single Page", variant="primary")
-            image_output = gr.Image(label="Generated Page", height=400)
-            status_output = gr.Textbox(label="Status", interactive=False)
     generate_btn.click(
-        fn=generate_single_page,
-        inputs=[prompt_input, story_title_input, text_input, model_choice, style_choice],
-        outputs=[image_output, status_output]
     )
 app = gr.mount_gradio_app(app, demo, path="/")

 import gradio as gr
 import torch
+from diffusers import StableDiffusionPipeline, EulerAncestralDiscreteScheduler
 from PIL import Image
 import io
 import requests
 import os
 from datetime import datetime
 import re
 import time
+from typing import List, Optional
 from fastapi import FastAPI, HTTPException, BackgroundTasks
 from pydantic import BaseModel
 import gc
 import psutil
 import threading
+from concurrent.futures import ThreadPoolExecutor, as_completed
 # External OCI API URL
 OCI_API_BASE_URL = "https://yukee1992-oci-story-book.hf.space"
     story_title: str
     scenes: List[StoryScene]
     characters: List[CharacterDescription] = []
+    model_choice: str = "dreamshaper-8"
     style: str = "childrens_book"
+class StorybookResponse(BaseModel):
+    status: str
+    story_title: str
+    total_pages: int
+    characters_used: int
+    generated_pages: int
+    generation_time: float
+    message: str
+    folder_path: str
+    pages: List[dict]
 # MODEL SELECTION
 MODEL_CHOICES = {
     "dreamshaper-8": "lykon/dreamshaper-8",
     "realistic-vision": "SG161222/Realistic_Vision_V5.1",
 }
+# GLOBAL MODEL CACHE
 model_cache = {}
 current_model_name = None
 current_pipe = None
 model_lock = threading.Lock()
 # Character consistency tracking
 character_seeds = {}
+# Thread pool for parallel processing
+executor = ThreadPoolExecutor(max_workers=2)
 def monitor_memory():
     try:
         process = psutil.Process()
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
+def load_model(model_name="dreamshaper-8"):
+    """Thread-safe model loading"""
     global model_cache, current_model_name, current_pipe
     with model_lock:
         if model_name in model_cache:
             current_pipe = model_cache[model_name]
             current_model_name = model_name
             return current_pipe
         print(f"🔄 Loading model: {model_name}")
         try:
+            model_id = MODEL_CHOICES.get(model_name, "lykon/dreamshaper-8")
+            pipe = StableDiffusionPipeline.from_pretrained(
+                model_id,
+                torch_dtype=torch.float32,
+                safety_checker=None,
+                requires_safety_checker=False
+            )
+            pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
             pipe = pipe.to("cpu")
             model_cache[model_name] = pipe
             current_pipe = pipe
             current_model_name = model_name
+            print(f"✅ Model loaded: {model_name}")
             return pipe
         except Exception as e:
             print(f"❌ Model loading failed: {e}")
+            raise
 # Initialize default model
 print("🚀 Initializing Storybook Generator...")
+load_model("dreamshaper-8")
 print("✅ Model loaded and ready!")
+# CRITICAL: PROMPT OPTIMIZATION THAT ACTUALLY WORKS
+def optimize_prompt(scene_visual, characters, style="childrens_book", page_number=1):
+    """
+    Create a prompt that FITS within 77 tokens while preserving the ESSENCE
+    """
+    # Extract ONLY the most critical information
+    character_essence = ""
     if characters:
+        char_descriptors = []
         for char in characters:
+            desc = char.get('description', '') if isinstance(char, dict) else getattr(char, 'description', '')
+            # Extract ONLY: species + color + 1 key feature
             import re
             species_match = re.search(r'(rabbit|hedgehog|bird|dog|cat|fox|bear|dragon|human|girl|boy)', desc, re.IGNORECASE)
             species = species_match.group(1) if species_match else "character"
             color_match = re.search(r'(white|black|brown|blue|red|green|yellow|golden|pink)', desc, re.IGNORECASE)
             color = color_match.group(1) if color_match else ""
+            # Find one key feature
+            key_feature = ""
+            if 'glasses' in desc.lower(): key_feature = "with glasses"
+            elif 'dress' in desc.lower(): key_feature = "in dress"
+            elif 'hat' in desc.lower(): key_feature = "with hat"
+            char_descriptors.append(f"{color} {species} {key_feature}".strip())
+        character_essence = f"Features: {', '.join(char_descriptors)}. "
+    # Compress scene description to MAX 40 words
+    scene_words = scene_visual.split()
+    if len(scene_words) > 40:
+        scene_compressed = ' '.join(scene_words[:40])
+    else:
+        scene_compressed = scene_visual
+    # Style context (very brief)
+    style_context = {
+        "childrens_book": "children's book illustration",
+        "realistic": "photorealistic",
+        "fantasy": "fantasy art",
+        "anime": "anime style"
+    }.get(style, "children's book illustration")
+    # Build the final prompt (GUARANTEED to fit 77 tokens)
+    continuity = f"Scene {page_number}: " if page_number > 1 else ""
+    final_prompt = f"{continuity}{scene_compressed}. {character_essence}{style_context}. masterpiece, best quality"
     # Ensure it's under 60 words
     words = final_prompt.split()
     if len(words) > 60:
         final_prompt = ' '.join(words[:60])
+    print(f"📝 Optimized prompt: {final_prompt}")
+    print(f"📏 Length: {len(final_prompt.split())} words")
     return final_prompt
 def enhance_prompt(scene_visual, characters, style="childrens_book", page_number=1):
+    """Create optimized prompt that WILL work"""
+    main_prompt = optimize_prompt(scene_visual, characters, style, page_number)
     negative_prompt = (
         "blurry, low quality, ugly, deformed, bad anatomy, "
+        "watermark, text, username, multiple people, inconsistent"
     )
     return main_prompt, negative_prompt
     return character_seeds[story_title][seed_key]
+def generate_single_page(scene_data, story_title, sequence_number, characters, model_choice, style):
+    """Generate a single page - isolated for error handling"""
     try:
+        scene_visual = scene_data.get('visual', '')
+        scene_text = scene_data.get('text', '')
+        print(f"🔄 Generating page {sequence_number}...")
         enhanced_prompt, negative_prompt = enhance_prompt(
             scene_visual, characters, style, sequence_number
         )
+        # Get character name for seed
+        main_char_name = "default"
         if characters:
             first_char = characters[0]
+            main_char_name = first_char.get('name', 'default') if isinstance(first_char, dict) else getattr(first_char, 'name', 'default')
+        # Use consistent seed
+        generator = torch.Generator(device="cpu")
+        main_char_seed = get_character_seed(story_title, main_char_name, sequence_number)
+        generator.manual_seed(main_char_seed)
+        # Generate with current pipe (already loaded)
+        global current_pipe
         image = current_pipe(
             prompt=enhanced_prompt,
             negative_prompt=negative_prompt,
+            num_inference_steps=20,  # Faster generation
             guidance_scale=7.0,
+            width=512,  # Smaller for speed
             height=512,
             generator=generator
         ).images[0]
         save_status = save_complete_storybook_page(image, story_title, sequence_number, scene_text)
+        return {
+            "success": True,
+            "page_number": sequence_number,
+            "image": image,
+            "status": save_status
+        }
     except Exception as e:
+        return {
+            "success": False,
+            "page_number": sequence_number,
+            "error": f"Generation failed: {str(e)}"
+        }
+# FastAPI endpoint - OPTIMIZED
+@app.post("/api/generate-storybook", response_model=StorybookResponse)
 async def api_generate_storybook(request: StorybookRequest):
+    """API endpoint that WON'T timeout"""
     try:
         print(f"📚 Received request: {request.story_title}")
         print(f"📖 Pages: {len(request.scenes)}")
+        # IMMEDIATE response to n8n to prevent timeout
+        response_data = {
+            "status": "processing",
+            "story_title": request.story_title,
+            "total_pages": len(request.scenes),
+            "characters_used": len(request.characters),
+            "generated_pages": 0,
+            "generation_time": 0,
+            "message": "Processing started in background",
+            "folder_path": f"storybook-library/stories/{request.story_title.replace(' ', '_')}/",
+            "pages": []
+        }
+        # Start background processing
+        background_tasks = BackgroundTasks()
+        background_tasks.add_task(process_storybook_background, request)
+        return response_data
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Request failed: {str(e)}")
+def process_storybook_background(request):
+    """Background processing to avoid timeouts"""
+    try:
         start_time = time.time()
+        # Load model ONCE
+        load_model(request.model_choice)
+        # Convert characters to dict
         characters_dict = []
         for char in request.characters:
+            characters_dict.append({
+                "name": char.name,
+                "description": char.description
+            })
+        results = []
+        status_messages = []
+        # Process each page SEQUENTIALLY (better for memory)
+        for i, scene in enumerate(request.scenes, 1):
+            try:
+                result = generate_single_page(
+                    {"visual": scene.visual, "text": scene.text},
+                    request.story_title,
+                    i,
+                    characters_dict,
+                    request.model_choice,
+                    request.style
+                )
+                if result["success"]:
+                    results.append(result)
+                    status_messages.append(f"Page {i}: {result['status']}")
+                    print(f"✅ Page {i} completed successfully")
+                else:
+                    status_messages.append(f"Page {i}: {result['error']}")
+                    print(f"❌ Page {i} failed: {result['error']}")
+                # Clean memory after each page
+                cleanup_memory()
+                # Add small delay to prevent resource exhaustion
+                if i < len(request.scenes):
+                    time.sleep(2)
+            except Exception as e:
+                error_msg = f"Page {i} failed: {str(e)}"
+                status_messages.append(error_msg)
+                print(f"❌ {error_msg}")
+        total_time = time.time() - start_time
+        print(f"✅ Background processing completed in {total_time:.2f} seconds")
     except Exception as e:
+        print(f"❌ Background processing failed: {str(e)}")
 @app.get("/api/health")
 async def health_check():
         "current_model": current_model_name
     }
+# Simple Gradio interface
 with gr.Blocks(title="Storybook Generator", theme="soft") as demo:
     gr.Markdown("# 📚 Storybook Generator")
     with gr.Row():
+        story_title = gr.Textbox(label="Story Title")
+        prompt_input = gr.Textbox(label="Scene Description", lines=3)
+        generate_btn = gr.Button("Generate")
+        output_image = gr.Image()
+        status = gr.Textbox()
     generate_btn.click(
+        fn=lambda p, t: generate_single_page(
+            {"visual": p, "text": ""}, t, 1, [], "dreamshaper-8", "childrens_book"
+        ),
+        inputs=[prompt_input, story_title],
+        outputs=[output_image, status]
     )
 app = gr.mount_gradio_app(app, demo, path="/")