Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -18,8 +18,6 @@ import random
|
|
| 18 |
import gc
|
| 19 |
import psutil
|
| 20 |
import threading
|
| 21 |
-
from transformers import CLIPTokenizer, CLIPTextModel
|
| 22 |
-
import numpy as np
|
| 23 |
|
| 24 |
# External OCI API URL
|
| 25 |
OCI_API_BASE_URL = "https://yukee1992-oci-story-book.hf.space"
|
|
@@ -53,7 +51,7 @@ class StorybookRequest(BaseModel):
|
|
| 53 |
model_choice: str = "sdxl"
|
| 54 |
style: str = "childrens_book"
|
| 55 |
|
| 56 |
-
# MODEL SELECTION
|
| 57 |
MODEL_CHOICES = {
|
| 58 |
"sdxl": "stabilityai/stable-diffusion-xl-base-1.0",
|
| 59 |
"sdxl-turbo": "stabilityai/sdxl-turbo",
|
|
@@ -70,27 +68,11 @@ current_pipe = None
|
|
| 70 |
character_descriptions = {}
|
| 71 |
character_seeds = {}
|
| 72 |
|
| 73 |
-
#
|
| 74 |
-
clip_tokenizer = None
|
| 75 |
-
clip_model = None
|
| 76 |
-
|
| 77 |
-
def initialize_clip():
|
| 78 |
-
"""Initialize CLIP for long prompt processing"""
|
| 79 |
-
global clip_tokenizer, clip_model
|
| 80 |
-
try:
|
| 81 |
-
clip_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
|
| 82 |
-
clip_model = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14")
|
| 83 |
-
print("β
CLIP model loaded for long prompt processing")
|
| 84 |
-
except Exception as e:
|
| 85 |
-
print(f"β CLIP loading failed: {e}")
|
| 86 |
-
|
| 87 |
-
# Memory monitoring function
|
| 88 |
def monitor_memory():
|
| 89 |
try:
|
| 90 |
process = psutil.Process()
|
| 91 |
-
|
| 92 |
-
print(f"π Memory usage: {memory_usage:.2f} MB")
|
| 93 |
-
return memory_usage
|
| 94 |
except:
|
| 95 |
return 0
|
| 96 |
|
|
@@ -147,150 +129,93 @@ def load_model(model_name="sdxl"):
|
|
| 147 |
model_cache[model_name] = pipe
|
| 148 |
return pipe
|
| 149 |
|
| 150 |
-
# Initialize
|
| 151 |
print("π Initializing Storybook Generator...")
|
| 152 |
-
initialize_clip()
|
| 153 |
current_pipe = load_model("sdxl")
|
| 154 |
-
print("β
|
| 155 |
|
| 156 |
-
#
|
| 157 |
-
def
|
| 158 |
"""
|
| 159 |
-
|
| 160 |
-
|
| 161 |
"""
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
sentences = [s.strip() for s in long_prompt.split('.') if s.strip()]
|
| 165 |
-
return sentences
|
| 166 |
-
|
| 167 |
-
# Tokenize with CLIP to understand semantic boundaries
|
| 168 |
-
tokens = clip_tokenizer(long_prompt, return_tensors="pt", truncation=False)
|
| 169 |
-
token_count = tokens.input_ids.shape[1]
|
| 170 |
-
|
| 171 |
-
if token_count <= max_tokens:
|
| 172 |
-
return [long_prompt]
|
| 173 |
-
|
| 174 |
-
print(f"π Segmenting very long prompt: {token_count} tokens")
|
| 175 |
-
|
| 176 |
-
# Split into sentences first
|
| 177 |
-
sentences = [s.strip() for s in long_prompt.split('.') if s.strip()]
|
| 178 |
-
segments = []
|
| 179 |
-
current_segment = ""
|
| 180 |
-
|
| 181 |
-
for sentence in sentences:
|
| 182 |
-
test_segment = current_segment + ". " + sentence if current_segment else sentence
|
| 183 |
-
test_tokens = clip_tokenizer(test_segment, return_tensors="pt", truncation=False)
|
| 184 |
-
|
| 185 |
-
if test_tokens.input_ids.shape[1] <= max_tokens:
|
| 186 |
-
current_segment = test_segment
|
| 187 |
-
else:
|
| 188 |
-
if current_segment:
|
| 189 |
-
segments.append(current_segment)
|
| 190 |
-
current_segment = sentence
|
| 191 |
-
|
| 192 |
-
if current_segment:
|
| 193 |
-
segments.append(current_segment)
|
| 194 |
-
|
| 195 |
-
return segments
|
| 196 |
-
|
| 197 |
-
def create_prompt_hierarchy(full_prompt):
|
| 198 |
-
"""
|
| 199 |
-
Create a hierarchical prompt structure with main focus and supporting details
|
| 200 |
-
"""
|
| 201 |
-
segments = segment_long_prompt(full_prompt)
|
| 202 |
-
|
| 203 |
-
if len(segments) == 1:
|
| 204 |
-
return full_prompt
|
| 205 |
-
|
| 206 |
-
# The first segment is most important (main subject/action)
|
| 207 |
-
main_prompt = segments[0]
|
| 208 |
-
|
| 209 |
-
# Remaining segments become supporting context with weights
|
| 210 |
-
supporting_context = ""
|
| 211 |
-
for i, segment in enumerate(segments[1:], 1):
|
| 212 |
-
weight = 1.3 - (i * 0.1) # Decreasing weight for later segments
|
| 213 |
-
weight = max(0.8, min(1.5, weight))
|
| 214 |
-
supporting_context += f" ({segment}:{weight:.1f})"
|
| 215 |
-
|
| 216 |
-
final_prompt = f"{main_prompt}.{supporting_context}. masterpiece, best quality, 4K"
|
| 217 |
-
return final_prompt
|
| 218 |
-
|
| 219 |
-
def extract_key_phrases(prompt, max_phrases=10):
|
| 220 |
-
"""
|
| 221 |
-
Extract the most important phrases from very long prompts
|
| 222 |
-
"""
|
| 223 |
-
# Simple heuristic: nouns, adjectives, and verbs are important
|
| 224 |
-
words = prompt.split()
|
| 225 |
-
important_words = []
|
| 226 |
-
|
| 227 |
-
# Prioritize words after colons, in parentheses, or quoted
|
| 228 |
-
for i, word in enumerate(words):
|
| 229 |
-
if (':' in word or '(' in word or '[' in word or
|
| 230 |
-
word.isupper() or (i > 0 and words[i-1][-1] == ':')):
|
| 231 |
-
important_words.append(word)
|
| 232 |
-
|
| 233 |
-
# Also take first few words of each sentence
|
| 234 |
-
sentences = prompt.split('.')
|
| 235 |
-
for sentence in sentences:
|
| 236 |
-
first_words = sentence.strip().split()[:3]
|
| 237 |
-
important_words.extend(first_words)
|
| 238 |
-
|
| 239 |
-
# Remove duplicates and limit
|
| 240 |
-
important_words = list(set(important_words))[:max_phrases]
|
| 241 |
-
return " ".join(important_words)
|
| 242 |
-
|
| 243 |
-
def enhance_prompt(scene_visual, characters, style="childrens_book", page_number=1):
|
| 244 |
-
"""Create comprehensive prompt with NO length limits"""
|
| 245 |
-
|
| 246 |
-
# Character context - include ALL details
|
| 247 |
-
character_context = ""
|
| 248 |
if characters:
|
| 249 |
-
char_descriptions = []
|
| 250 |
for char in characters:
|
| 251 |
if hasattr(char, 'description'):
|
| 252 |
-
|
| 253 |
elif isinstance(char, dict):
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
|
| 258 |
-
#
|
| 259 |
-
continuity_context = f"
|
| 260 |
|
| 261 |
-
# Style templates
|
| 262 |
style_presets = {
|
| 263 |
-
"childrens_book": "children's book illustration, watercolor
|
| 264 |
-
"realistic": "photorealistic, professional photography
|
| 265 |
-
"fantasy": "fantasy art, digital painting, magical
|
| 266 |
-
"anime": "anime style,
|
| 267 |
}
|
| 268 |
|
| 269 |
style_prompt = style_presets.get(style, style_presets["childrens_book"])
|
| 270 |
|
| 271 |
-
# Build
|
| 272 |
-
|
| 273 |
-
{continuity_context}
|
| 274 |
-
{scene_visual}.
|
| 275 |
-
{character_context}
|
| 276 |
-
Art style: {style_prompt}.
|
| 277 |
-
Technical quality: masterpiece, best quality, 4K resolution, ultra detailed,
|
| 278 |
-
professional artwork, award winning, trending on artstation, perfect composition,
|
| 279 |
-
ideal lighting, beautiful colors, no errors, perfect anatomy, consistent style
|
| 280 |
-
"""
|
| 281 |
|
| 282 |
-
|
| 283 |
-
|
| 284 |
|
| 285 |
-
|
| 286 |
|
| 287 |
-
#
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
|
| 293 |
-
print(f"π
|
|
|
|
| 294 |
|
| 295 |
# Negative prompt
|
| 296 |
negative_prompt = (
|
|
@@ -299,11 +224,10 @@ def enhance_prompt(scene_visual, characters, style="childrens_book", page_number
|
|
| 299 |
"disconnected limbs, mutation, mutated, disgusting, bad art, "
|
| 300 |
"beginner, amateur, distorted, watermark, signature, text, username, "
|
| 301 |
"multiple people, crowd, group, different characters, inconsistent features, "
|
| 302 |
-
"changed appearance, different face, altered features, low resolution
|
| 303 |
-
"jpeg artifacts, compression artifacts, noise, grain, out of focus"
|
| 304 |
)
|
| 305 |
|
| 306 |
-
return
|
| 307 |
|
| 308 |
def save_complete_storybook_page(image, story_title, sequence_number, scene_text):
|
| 309 |
try:
|
|
@@ -361,7 +285,7 @@ def generate_storybook_page(scene_visual, story_title, sequence_number, scene_te
|
|
| 361 |
)
|
| 362 |
|
| 363 |
print(f"π Generating page {sequence_number}")
|
| 364 |
-
print(f"π
|
| 365 |
|
| 366 |
if characters:
|
| 367 |
char_names = []
|
|
@@ -379,16 +303,17 @@ def generate_storybook_page(scene_visual, story_title, sequence_number, scene_te
|
|
| 379 |
char_name = first_char.name if hasattr(first_char, 'name') else first_char.get('name', 'unknown')
|
| 380 |
main_char_seed = get_character_seed(story_title, char_name, sequence_number)
|
| 381 |
generator.manual_seed(main_char_seed)
|
|
|
|
| 382 |
else:
|
| 383 |
scene_seed = hash(f"{story_title}_{sequence_number}") % 1000000
|
| 384 |
generator.manual_seed(scene_seed)
|
| 385 |
|
| 386 |
-
# Generate
|
| 387 |
image = current_pipe(
|
| 388 |
prompt=enhanced_prompt,
|
| 389 |
negative_prompt=negative_prompt,
|
| 390 |
-
num_inference_steps=
|
| 391 |
-
guidance_scale=7.
|
| 392 |
width=768,
|
| 393 |
height=768,
|
| 394 |
generator=generator
|
|
|
|
| 18 |
import gc
|
| 19 |
import psutil
|
| 20 |
import threading
|
|
|
|
|
|
|
| 21 |
|
| 22 |
# External OCI API URL
|
| 23 |
OCI_API_BASE_URL = "https://yukee1992-oci-story-book.hf.space"
|
|
|
|
| 51 |
model_choice: str = "sdxl"
|
| 52 |
style: str = "childrens_book"
|
| 53 |
|
| 54 |
+
# MODEL SELECTION - SDXL handles longer prompts better
|
| 55 |
MODEL_CHOICES = {
|
| 56 |
"sdxl": "stabilityai/stable-diffusion-xl-base-1.0",
|
| 57 |
"sdxl-turbo": "stabilityai/sdxl-turbo",
|
|
|
|
| 68 |
character_descriptions = {}
|
| 69 |
character_seeds = {}
|
| 70 |
|
| 71 |
+
# Memory monitoring
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
def monitor_memory():
|
| 73 |
try:
|
| 74 |
process = psutil.Process()
|
| 75 |
+
return process.memory_info().rss / 1024 / 1024
|
|
|
|
|
|
|
| 76 |
except:
|
| 77 |
return 0
|
| 78 |
|
|
|
|
| 129 |
model_cache[model_name] = pipe
|
| 130 |
return pipe
|
| 131 |
|
| 132 |
+
# Initialize default model
|
| 133 |
print("π Initializing Storybook Generator...")
|
|
|
|
| 134 |
current_pipe = load_model("sdxl")
|
| 135 |
+
print("β
Model loaded and ready!")
|
| 136 |
|
| 137 |
+
# TRUE UNLIMITED PROMPT SOLUTION
|
| 138 |
+
def create_compressed_prompt(scene_visual, characters, style="childrens_book", page_number=1):
|
| 139 |
"""
|
| 140 |
+
Create a compressed but comprehensive prompt that fits within token limits
|
| 141 |
+
while preserving ALL important information
|
| 142 |
"""
|
| 143 |
+
# Extract ONLY the most critical character features
|
| 144 |
+
character_features = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
if characters:
|
|
|
|
| 146 |
for char in characters:
|
| 147 |
if hasattr(char, 'description'):
|
| 148 |
+
desc = char.description
|
| 149 |
elif isinstance(char, dict):
|
| 150 |
+
desc = char.get('description', '')
|
| 151 |
+
else:
|
| 152 |
+
desc = str(char)
|
| 153 |
+
|
| 154 |
+
# Extract key features: age, appearance, clothing
|
| 155 |
+
import re
|
| 156 |
+
# Get age if mentioned
|
| 157 |
+
age_match = re.search(r'(\d+)[\- ]?year[\- ]?old', desc, re.IGNORECASE)
|
| 158 |
+
age = f"{age_match.group(1)} year old" if age_match else ""
|
| 159 |
+
|
| 160 |
+
# Get species/type
|
| 161 |
+
species_match = re.search(r'(rabbit|hedgehog|bird|dog|cat|fox|bear|dragon|unicorn|human|girl|boy)', desc, re.IGNORECASE)
|
| 162 |
+
species = species_match.group(1) if species_match else "character"
|
| 163 |
+
|
| 164 |
+
# Get color/main features
|
| 165 |
+
color_match = re.search(r'(blonde|brown|black|white|blue|red|green|yellow|golden|silver)', desc, re.IGNORECASE)
|
| 166 |
+
color = color_match.group(1) if color_match else ""
|
| 167 |
+
|
| 168 |
+
# Get key accessories
|
| 169 |
+
accessories = []
|
| 170 |
+
if 'glasses' in desc.lower(): accessories.append('glasses')
|
| 171 |
+
if 'dress' in desc.lower(): accessories.append('dress')
|
| 172 |
+
if 'hat' in desc.lower(): accessories.append('hat')
|
| 173 |
+
if 'satchel' in desc.lower(): accessories.append('satchel')
|
| 174 |
+
|
| 175 |
+
# Build compressed description
|
| 176 |
+
compressed_desc = f"{age} {color} {species}".strip()
|
| 177 |
+
if accessories:
|
| 178 |
+
compressed_desc += f" with {', '.join(accessories)}"
|
| 179 |
+
|
| 180 |
+
character_features.append(compressed_desc)
|
| 181 |
|
| 182 |
+
# Build scene context
|
| 183 |
+
continuity_context = f"scene {page_number}" if page_number > 1 else ""
|
| 184 |
|
| 185 |
+
# Style templates (compressed)
|
| 186 |
style_presets = {
|
| 187 |
+
"childrens_book": "children's book illustration, watercolor, whimsical",
|
| 188 |
+
"realistic": "photorealistic, professional photography",
|
| 189 |
+
"fantasy": "fantasy art, digital painting, magical",
|
| 190 |
+
"anime": "anime style, clean lines, vibrant colors"
|
| 191 |
}
|
| 192 |
|
| 193 |
style_prompt = style_presets.get(style, style_presets["childrens_book"])
|
| 194 |
|
| 195 |
+
# Build the final compressed prompt
|
| 196 |
+
compressed_prompt = f"{continuity_context} {scene_visual}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
+
if character_features:
|
| 199 |
+
compressed_prompt += f". Characters: {', '.join(character_features)}"
|
| 200 |
|
| 201 |
+
compressed_prompt += f". Style: {style_prompt}. masterpiece, best quality, 4K"
|
| 202 |
|
| 203 |
+
# Ensure it's within reasonable length
|
| 204 |
+
words = compressed_prompt.split()
|
| 205 |
+
if len(words) > 60:
|
| 206 |
+
compressed_prompt = ' '.join(words[:60]) + '...'
|
| 207 |
+
|
| 208 |
+
return compressed_prompt
|
| 209 |
+
|
| 210 |
+
def enhance_prompt(scene_visual, characters, style="childrens_book", page_number=1):
|
| 211 |
+
"""
|
| 212 |
+
Create optimized prompt that preserves essence while fitting token limits
|
| 213 |
+
"""
|
| 214 |
+
# Use compressed prompt for the actual generation
|
| 215 |
+
main_prompt = create_compressed_prompt(scene_visual, characters, style, page_number)
|
| 216 |
|
| 217 |
+
print(f"π Compressed prompt: {main_prompt}")
|
| 218 |
+
print(f"π Length: {len(main_prompt.split())} words")
|
| 219 |
|
| 220 |
# Negative prompt
|
| 221 |
negative_prompt = (
|
|
|
|
| 224 |
"disconnected limbs, mutation, mutated, disgusting, bad art, "
|
| 225 |
"beginner, amateur, distorted, watermark, signature, text, username, "
|
| 226 |
"multiple people, crowd, group, different characters, inconsistent features, "
|
| 227 |
+
"changed appearance, different face, altered features, low resolution"
|
|
|
|
| 228 |
)
|
| 229 |
|
| 230 |
+
return main_prompt, negative_prompt
|
| 231 |
|
| 232 |
def save_complete_storybook_page(image, story_title, sequence_number, scene_text):
|
| 233 |
try:
|
|
|
|
| 285 |
)
|
| 286 |
|
| 287 |
print(f"π Generating page {sequence_number}")
|
| 288 |
+
print(f"π Using prompt: {enhanced_prompt}")
|
| 289 |
|
| 290 |
if characters:
|
| 291 |
char_names = []
|
|
|
|
| 303 |
char_name = first_char.name if hasattr(first_char, 'name') else first_char.get('name', 'unknown')
|
| 304 |
main_char_seed = get_character_seed(story_title, char_name, sequence_number)
|
| 305 |
generator.manual_seed(main_char_seed)
|
| 306 |
+
print(f"π± Using seed {main_char_seed} for {char_name}")
|
| 307 |
else:
|
| 308 |
scene_seed = hash(f"{story_title}_{sequence_number}") % 1000000
|
| 309 |
generator.manual_seed(scene_seed)
|
| 310 |
|
| 311 |
+
# Generate image
|
| 312 |
image = current_pipe(
|
| 313 |
prompt=enhanced_prompt,
|
| 314 |
negative_prompt=negative_prompt,
|
| 315 |
+
num_inference_steps=35,
|
| 316 |
+
guidance_scale=7.5,
|
| 317 |
width=768,
|
| 318 |
height=768,
|
| 319 |
generator=generator
|