Batch_image_generator

Build error

App Files Files Community

yukee1992 commited on Sep 20, 2025

Commit

b05c170

verified ·

1 Parent(s): 2111d34

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -152

app.py CHANGED Viewed

@@ -18,8 +18,6 @@ import random
 import gc
 import psutil
 import threading
-from transformers import CLIPTokenizer, CLIPTextModel
-import numpy as np
 # External OCI API URL
 OCI_API_BASE_URL = "https://yukee1992-oci-story-book.hf.space"
@@ -53,7 +51,7 @@ class StorybookRequest(BaseModel):
     model_choice: str = "sdxl"
     style: str = "childrens_book"
-# MODEL SELECTION
 MODEL_CHOICES = {
     "sdxl": "stabilityai/stable-diffusion-xl-base-1.0",
     "sdxl-turbo": "stabilityai/sdxl-turbo",
@@ -70,27 +68,11 @@ current_pipe = None
 character_descriptions = {}
 character_seeds = {}
-# CLIP tokenizer for long prompt handling
-clip_tokenizer = None
-clip_model = None
-def initialize_clip():
-    """Initialize CLIP for long prompt processing"""
-    global clip_tokenizer, clip_model
-    try:
-        clip_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
-        clip_model = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14")
-        print("✅ CLIP model loaded for long prompt processing")
-    except Exception as e:
-        print(f"❌ CLIP loading failed: {e}")
-# Memory monitoring function
 def monitor_memory():
     try:
         process = psutil.Process()
-        memory_usage = process.memory_info().rss / 1024 / 1024
-        print(f"📊 Memory usage: {memory_usage:.2f} MB")
-        return memory_usage
     except:
         return 0
@@ -147,150 +129,93 @@ def load_model(model_name="sdxl"):
         model_cache[model_name] = pipe
         return pipe
-# Initialize CLIP and default model
 print("🚀 Initializing Storybook Generator...")
-initialize_clip()
 current_pipe = load_model("sdxl")
-print("✅ Models loaded and ready!")
-# ADVANCED LONG PROMPT HANDLING
-def segment_long_prompt(long_prompt, max_tokens=75):
     """
-    Split long prompt into meaningful segments using CLIP tokenization
-    and semantic analysis
     """
-    if clip_tokenizer is None:
-        # Fallback: simple sentence splitting
-        sentences = [s.strip() for s in long_prompt.split('.') if s.strip()]
-        return sentences
-    # Tokenize with CLIP to understand semantic boundaries
-    tokens = clip_tokenizer(long_prompt, return_tensors="pt", truncation=False)
-    token_count = tokens.input_ids.shape[1]
-    if token_count <= max_tokens:
-        return [long_prompt]
-    print(f"📝 Segmenting very long prompt: {token_count} tokens")
-    # Split into sentences first
-    sentences = [s.strip() for s in long_prompt.split('.') if s.strip()]
-    segments = []
-    current_segment = ""
-    for sentence in sentences:
-        test_segment = current_segment + ". " + sentence if current_segment else sentence
-        test_tokens = clip_tokenizer(test_segment, return_tensors="pt", truncation=False)
-        if test_tokens.input_ids.shape[1] <= max_tokens:
-            current_segment = test_segment
-        else:
-            if current_segment:
-                segments.append(current_segment)
-            current_segment = sentence
-    if current_segment:
-        segments.append(current_segment)
-    return segments
-def create_prompt_hierarchy(full_prompt):
-    """
-    Create a hierarchical prompt structure with main focus and supporting details
-    """
-    segments = segment_long_prompt(full_prompt)
-    if len(segments) == 1:
-        return full_prompt
-    # The first segment is most important (main subject/action)
-    main_prompt = segments[0]
-    # Remaining segments become supporting context with weights
-    supporting_context = ""
-    for i, segment in enumerate(segments[1:], 1):
-        weight = 1.3 - (i * 0.1)  # Decreasing weight for later segments
-        weight = max(0.8, min(1.5, weight))
-        supporting_context += f" ({segment}:{weight:.1f})"
-    final_prompt = f"{main_prompt}.{supporting_context}. masterpiece, best quality, 4K"
-    return final_prompt
-def extract_key_phrases(prompt, max_phrases=10):
-    """
-    Extract the most important phrases from very long prompts
-    """
-    # Simple heuristic: nouns, adjectives, and verbs are important
-    words = prompt.split()
-    important_words = []
-    # Prioritize words after colons, in parentheses, or quoted
-    for i, word in enumerate(words):
-        if (':' in word or '(' in word or '[' in word or
-            word.isupper() or (i > 0 and words[i-1][-1] == ':')):
-            important_words.append(word)
-    # Also take first few words of each sentence
-    sentences = prompt.split('.')
-    for sentence in sentences:
-        first_words = sentence.strip().split()[:3]
-        important_words.extend(first_words)
-    # Remove duplicates and limit
-    important_words = list(set(important_words))[:max_phrases]
-    return " ".join(important_words)
-def enhance_prompt(scene_visual, characters, style="childrens_book", page_number=1):
-    """Create comprehensive prompt with NO length limits"""
-    # Character context - include ALL details
-    character_context = ""
     if characters:
-        char_descriptions = []
         for char in characters:
             if hasattr(char, 'description'):
-                char_descriptions.append(char.description)
             elif isinstance(char, dict):
-                char_descriptions.append(char.get('description', ''))
-        character_context = " ".join(char_descriptions)
-        character_context = f"Character details: {character_context}."
-    # Scene continuity context
-    continuity_context = f"Scene {page_number}, " if page_number > 1 else ""
-    # Style templates
     style_presets = {
-        "childrens_book": "children's book illustration, watercolor style, whimsical, charming, vibrant colors, soft lighting, storybook art, detailed backgrounds, cute characters, magical atmosphere",
-        "realistic": "photorealistic, professional photography, natural lighting, detailed, sharp focus, high resolution, realistic textures, studio quality, cinematic lighting",
-        "fantasy": "fantasy art, digital painting, magical, epic, concept art, dramatic lighting, mystical, otherworldly, detailed environments, heroic",
-        "anime": "anime style, Japanese animation, clean lines, vibrant colors, cel shading, detailed eyes, dynamic poses, manga style, professional animation"
     }
     style_prompt = style_presets.get(style, style_presets["childrens_book"])
-    # Build COMPREHENSIVE prompt with ALL details
-    full_prompt = f"""
-    {continuity_context}
-    {scene_visual}.
-    {character_context}
-    Art style: {style_prompt}.
-    Technical quality: masterpiece, best quality, 4K resolution, ultra detailed,
-    professional artwork, award winning, trending on artstation, perfect composition,
-    ideal lighting, beautiful colors, no errors, perfect anatomy, consistent style
-    """
-    # Clean up the prompt
-    full_prompt = ' '.join(full_prompt.split())  # Remove extra whitespace
-    print(f"📝 Raw prompt length: {len(full_prompt.split())} words")
-    # Use hierarchical prompt creation for very long prompts
-    if len(full_prompt.split()) > 100:
-        optimized_prompt = create_prompt_hierarchy(full_prompt)
-    else:
-        optimized_prompt = full_prompt
-    print(f"📝 Final prompt length: {len(optimized_prompt.split())} words")
     # Negative prompt
     negative_prompt = (
@@ -299,11 +224,10 @@ def enhance_prompt(scene_visual, characters, style="childrens_book", page_number
         "disconnected limbs, mutation, mutated, disgusting, bad art, "
         "beginner, amateur, distorted, watermark, signature, text, username, "
         "multiple people, crowd, group, different characters, inconsistent features, "
-        "changed appearance, different face, altered features, low resolution, "
-        "jpeg artifacts, compression artifacts, noise, grain, out of focus"
     )
-    return optimized_prompt, negative_prompt
 def save_complete_storybook_page(image, story_title, sequence_number, scene_text):
     try:
@@ -361,7 +285,7 @@ def generate_storybook_page(scene_visual, story_title, sequence_number, scene_te
         )
         print(f"📖 Generating page {sequence_number}")
-        print(f"📝 Prompt preview: {enhanced_prompt[:150]}...")
         if characters:
             char_names = []
@@ -379,16 +303,17 @@ def generate_storybook_page(scene_visual, story_title, sequence_number, scene_te
             char_name = first_char.name if hasattr(first_char, 'name') else first_char.get('name', 'unknown')
             main_char_seed = get_character_seed(story_title, char_name, sequence_number)
             generator.manual_seed(main_char_seed)
         else:
             scene_seed = hash(f"{story_title}_{sequence_number}") % 1000000
             generator.manual_seed(scene_seed)
-        # Generate with SDXL which handles long prompts better
         image = current_pipe(
             prompt=enhanced_prompt,
             negative_prompt=negative_prompt,
-            num_inference_steps=40,  # More steps for better detail
-            guidance_scale=7.0,
             width=768,
             height=768,
             generator=generator

 import gc
 import psutil
 import threading
 # External OCI API URL
 OCI_API_BASE_URL = "https://yukee1992-oci-story-book.hf.space"
     model_choice: str = "sdxl"
     style: str = "childrens_book"
+# MODEL SELECTION - SDXL handles longer prompts better
 MODEL_CHOICES = {
     "sdxl": "stabilityai/stable-diffusion-xl-base-1.0",
     "sdxl-turbo": "stabilityai/sdxl-turbo",
 character_descriptions = {}
 character_seeds = {}
+# Memory monitoring
 def monitor_memory():
     try:
         process = psutil.Process()
+        return process.memory_info().rss / 1024 / 1024
     except:
         return 0
         model_cache[model_name] = pipe
         return pipe
+# Initialize default model
 print("🚀 Initializing Storybook Generator...")
 current_pipe = load_model("sdxl")
+print("✅ Model loaded and ready!")
+# TRUE UNLIMITED PROMPT SOLUTION
+def create_compressed_prompt(scene_visual, characters, style="childrens_book", page_number=1):
     """
+    Create a compressed but comprehensive prompt that fits within token limits
+    while preserving ALL important information
     """
+    # Extract ONLY the most critical character features
+    character_features = []
     if characters:
         for char in characters:
             if hasattr(char, 'description'):
+                desc = char.description
             elif isinstance(char, dict):
+                desc = char.get('description', '')
+            else:
+                desc = str(char)
+            # Extract key features: age, appearance, clothing
+            import re
+            # Get age if mentioned
+            age_match = re.search(r'(\d+)[\- ]?year[\- ]?old', desc, re.IGNORECASE)
+            age = f"{age_match.group(1)} year old" if age_match else ""
+            # Get species/type
+            species_match = re.search(r'(rabbit|hedgehog|bird|dog|cat|fox|bear|dragon|unicorn|human|girl|boy)', desc, re.IGNORECASE)
+            species = species_match.group(1) if species_match else "character"
+            # Get color/main features
+            color_match = re.search(r'(blonde|brown|black|white|blue|red|green|yellow|golden|silver)', desc, re.IGNORECASE)
+            color = color_match.group(1) if color_match else ""
+            # Get key accessories
+            accessories = []
+            if 'glasses' in desc.lower(): accessories.append('glasses')
+            if 'dress' in desc.lower(): accessories.append('dress')
+            if 'hat' in desc.lower(): accessories.append('hat')
+            if 'satchel' in desc.lower(): accessories.append('satchel')
+            # Build compressed description
+            compressed_desc = f"{age} {color} {species}".strip()
+            if accessories:
+                compressed_desc += f" with {', '.join(accessories)}"
+            character_features.append(compressed_desc)
+    # Build scene context
+    continuity_context = f"scene {page_number}" if page_number > 1 else ""
+    # Style templates (compressed)
     style_presets = {
+        "childrens_book": "children's book illustration, watercolor, whimsical",
+        "realistic": "photorealistic, professional photography",
+        "fantasy": "fantasy art, digital painting, magical",
+        "anime": "anime style, clean lines, vibrant colors"
     }
     style_prompt = style_presets.get(style, style_presets["childrens_book"])
+    # Build the final compressed prompt
+    compressed_prompt = f"{continuity_context} {scene_visual}"
+    if character_features:
+        compressed_prompt += f". Characters: {', '.join(character_features)}"
+    compressed_prompt += f". Style: {style_prompt}. masterpiece, best quality, 4K"
+    # Ensure it's within reasonable length
+    words = compressed_prompt.split()
+    if len(words) > 60:
+        compressed_prompt = ' '.join(words[:60]) + '...'
+    return compressed_prompt
+def enhance_prompt(scene_visual, characters, style="childrens_book", page_number=1):
+    """
+    Create optimized prompt that preserves essence while fitting token limits
+    """
+    # Use compressed prompt for the actual generation
+    main_prompt = create_compressed_prompt(scene_visual, characters, style, page_number)
+    print(f"📝 Compressed prompt: {main_prompt}")
+    print(f"📏 Length: {len(main_prompt.split())} words")
     # Negative prompt
     negative_prompt = (
         "disconnected limbs, mutation, mutated, disgusting, bad art, "
         "beginner, amateur, distorted, watermark, signature, text, username, "
         "multiple people, crowd, group, different characters, inconsistent features, "
+        "changed appearance, different face, altered features, low resolution"
     )
+    return main_prompt, negative_prompt
 def save_complete_storybook_page(image, story_title, sequence_number, scene_text):
     try:
         )
         print(f"📖 Generating page {sequence_number}")
+        print(f"📝 Using prompt: {enhanced_prompt}")
         if characters:
             char_names = []
             char_name = first_char.name if hasattr(first_char, 'name') else first_char.get('name', 'unknown')
             main_char_seed = get_character_seed(story_title, char_name, sequence_number)
             generator.manual_seed(main_char_seed)
+            print(f"🌱 Using seed {main_char_seed} for {char_name}")
         else:
             scene_seed = hash(f"{story_title}_{sequence_number}") % 1000000
             generator.manual_seed(scene_seed)
+        # Generate image
         image = current_pipe(
             prompt=enhanced_prompt,
             negative_prompt=negative_prompt,
+            num_inference_steps=35,
+            guidance_scale=7.5,
             width=768,
             height=768,
             generator=generator