yukee1992 commited on
Commit
b05c170
Β·
verified Β·
1 Parent(s): 2111d34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -152
app.py CHANGED
@@ -18,8 +18,6 @@ import random
18
  import gc
19
  import psutil
20
  import threading
21
- from transformers import CLIPTokenizer, CLIPTextModel
22
- import numpy as np
23
 
24
  # External OCI API URL
25
  OCI_API_BASE_URL = "https://yukee1992-oci-story-book.hf.space"
@@ -53,7 +51,7 @@ class StorybookRequest(BaseModel):
53
  model_choice: str = "sdxl"
54
  style: str = "childrens_book"
55
 
56
- # MODEL SELECTION
57
  MODEL_CHOICES = {
58
  "sdxl": "stabilityai/stable-diffusion-xl-base-1.0",
59
  "sdxl-turbo": "stabilityai/sdxl-turbo",
@@ -70,27 +68,11 @@ current_pipe = None
70
  character_descriptions = {}
71
  character_seeds = {}
72
 
73
- # CLIP tokenizer for long prompt handling
74
- clip_tokenizer = None
75
- clip_model = None
76
-
77
- def initialize_clip():
78
- """Initialize CLIP for long prompt processing"""
79
- global clip_tokenizer, clip_model
80
- try:
81
- clip_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
82
- clip_model = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14")
83
- print("βœ… CLIP model loaded for long prompt processing")
84
- except Exception as e:
85
- print(f"❌ CLIP loading failed: {e}")
86
-
87
- # Memory monitoring function
88
  def monitor_memory():
89
  try:
90
  process = psutil.Process()
91
- memory_usage = process.memory_info().rss / 1024 / 1024
92
- print(f"πŸ“Š Memory usage: {memory_usage:.2f} MB")
93
- return memory_usage
94
  except:
95
  return 0
96
 
@@ -147,150 +129,93 @@ def load_model(model_name="sdxl"):
147
  model_cache[model_name] = pipe
148
  return pipe
149
 
150
- # Initialize CLIP and default model
151
  print("πŸš€ Initializing Storybook Generator...")
152
- initialize_clip()
153
  current_pipe = load_model("sdxl")
154
- print("βœ… Models loaded and ready!")
155
 
156
- # ADVANCED LONG PROMPT HANDLING
157
- def segment_long_prompt(long_prompt, max_tokens=75):
158
  """
159
- Split long prompt into meaningful segments using CLIP tokenization
160
- and semantic analysis
161
  """
162
- if clip_tokenizer is None:
163
- # Fallback: simple sentence splitting
164
- sentences = [s.strip() for s in long_prompt.split('.') if s.strip()]
165
- return sentences
166
-
167
- # Tokenize with CLIP to understand semantic boundaries
168
- tokens = clip_tokenizer(long_prompt, return_tensors="pt", truncation=False)
169
- token_count = tokens.input_ids.shape[1]
170
-
171
- if token_count <= max_tokens:
172
- return [long_prompt]
173
-
174
- print(f"πŸ“ Segmenting very long prompt: {token_count} tokens")
175
-
176
- # Split into sentences first
177
- sentences = [s.strip() for s in long_prompt.split('.') if s.strip()]
178
- segments = []
179
- current_segment = ""
180
-
181
- for sentence in sentences:
182
- test_segment = current_segment + ". " + sentence if current_segment else sentence
183
- test_tokens = clip_tokenizer(test_segment, return_tensors="pt", truncation=False)
184
-
185
- if test_tokens.input_ids.shape[1] <= max_tokens:
186
- current_segment = test_segment
187
- else:
188
- if current_segment:
189
- segments.append(current_segment)
190
- current_segment = sentence
191
-
192
- if current_segment:
193
- segments.append(current_segment)
194
-
195
- return segments
196
-
197
- def create_prompt_hierarchy(full_prompt):
198
- """
199
- Create a hierarchical prompt structure with main focus and supporting details
200
- """
201
- segments = segment_long_prompt(full_prompt)
202
-
203
- if len(segments) == 1:
204
- return full_prompt
205
-
206
- # The first segment is most important (main subject/action)
207
- main_prompt = segments[0]
208
-
209
- # Remaining segments become supporting context with weights
210
- supporting_context = ""
211
- for i, segment in enumerate(segments[1:], 1):
212
- weight = 1.3 - (i * 0.1) # Decreasing weight for later segments
213
- weight = max(0.8, min(1.5, weight))
214
- supporting_context += f" ({segment}:{weight:.1f})"
215
-
216
- final_prompt = f"{main_prompt}.{supporting_context}. masterpiece, best quality, 4K"
217
- return final_prompt
218
-
219
- def extract_key_phrases(prompt, max_phrases=10):
220
- """
221
- Extract the most important phrases from very long prompts
222
- """
223
- # Simple heuristic: nouns, adjectives, and verbs are important
224
- words = prompt.split()
225
- important_words = []
226
-
227
- # Prioritize words after colons, in parentheses, or quoted
228
- for i, word in enumerate(words):
229
- if (':' in word or '(' in word or '[' in word or
230
- word.isupper() or (i > 0 and words[i-1][-1] == ':')):
231
- important_words.append(word)
232
-
233
- # Also take first few words of each sentence
234
- sentences = prompt.split('.')
235
- for sentence in sentences:
236
- first_words = sentence.strip().split()[:3]
237
- important_words.extend(first_words)
238
-
239
- # Remove duplicates and limit
240
- important_words = list(set(important_words))[:max_phrases]
241
- return " ".join(important_words)
242
-
243
- def enhance_prompt(scene_visual, characters, style="childrens_book", page_number=1):
244
- """Create comprehensive prompt with NO length limits"""
245
-
246
- # Character context - include ALL details
247
- character_context = ""
248
  if characters:
249
- char_descriptions = []
250
  for char in characters:
251
  if hasattr(char, 'description'):
252
- char_descriptions.append(char.description)
253
  elif isinstance(char, dict):
254
- char_descriptions.append(char.get('description', ''))
255
- character_context = " ".join(char_descriptions)
256
- character_context = f"Character details: {character_context}."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
 
258
- # Scene continuity context
259
- continuity_context = f"Scene {page_number}, " if page_number > 1 else ""
260
 
261
- # Style templates
262
  style_presets = {
263
- "childrens_book": "children's book illustration, watercolor style, whimsical, charming, vibrant colors, soft lighting, storybook art, detailed backgrounds, cute characters, magical atmosphere",
264
- "realistic": "photorealistic, professional photography, natural lighting, detailed, sharp focus, high resolution, realistic textures, studio quality, cinematic lighting",
265
- "fantasy": "fantasy art, digital painting, magical, epic, concept art, dramatic lighting, mystical, otherworldly, detailed environments, heroic",
266
- "anime": "anime style, Japanese animation, clean lines, vibrant colors, cel shading, detailed eyes, dynamic poses, manga style, professional animation"
267
  }
268
 
269
  style_prompt = style_presets.get(style, style_presets["childrens_book"])
270
 
271
- # Build COMPREHENSIVE prompt with ALL details
272
- full_prompt = f"""
273
- {continuity_context}
274
- {scene_visual}.
275
- {character_context}
276
- Art style: {style_prompt}.
277
- Technical quality: masterpiece, best quality, 4K resolution, ultra detailed,
278
- professional artwork, award winning, trending on artstation, perfect composition,
279
- ideal lighting, beautiful colors, no errors, perfect anatomy, consistent style
280
- """
281
 
282
- # Clean up the prompt
283
- full_prompt = ' '.join(full_prompt.split()) # Remove extra whitespace
284
 
285
- print(f"πŸ“ Raw prompt length: {len(full_prompt.split())} words")
286
 
287
- # Use hierarchical prompt creation for very long prompts
288
- if len(full_prompt.split()) > 100:
289
- optimized_prompt = create_prompt_hierarchy(full_prompt)
290
- else:
291
- optimized_prompt = full_prompt
 
 
 
 
 
 
 
 
292
 
293
- print(f"πŸ“ Final prompt length: {len(optimized_prompt.split())} words")
 
294
 
295
  # Negative prompt
296
  negative_prompt = (
@@ -299,11 +224,10 @@ def enhance_prompt(scene_visual, characters, style="childrens_book", page_number
299
  "disconnected limbs, mutation, mutated, disgusting, bad art, "
300
  "beginner, amateur, distorted, watermark, signature, text, username, "
301
  "multiple people, crowd, group, different characters, inconsistent features, "
302
- "changed appearance, different face, altered features, low resolution, "
303
- "jpeg artifacts, compression artifacts, noise, grain, out of focus"
304
  )
305
 
306
- return optimized_prompt, negative_prompt
307
 
308
  def save_complete_storybook_page(image, story_title, sequence_number, scene_text):
309
  try:
@@ -361,7 +285,7 @@ def generate_storybook_page(scene_visual, story_title, sequence_number, scene_te
361
  )
362
 
363
  print(f"πŸ“– Generating page {sequence_number}")
364
- print(f"πŸ“ Prompt preview: {enhanced_prompt[:150]}...")
365
 
366
  if characters:
367
  char_names = []
@@ -379,16 +303,17 @@ def generate_storybook_page(scene_visual, story_title, sequence_number, scene_te
379
  char_name = first_char.name if hasattr(first_char, 'name') else first_char.get('name', 'unknown')
380
  main_char_seed = get_character_seed(story_title, char_name, sequence_number)
381
  generator.manual_seed(main_char_seed)
 
382
  else:
383
  scene_seed = hash(f"{story_title}_{sequence_number}") % 1000000
384
  generator.manual_seed(scene_seed)
385
 
386
- # Generate with SDXL which handles long prompts better
387
  image = current_pipe(
388
  prompt=enhanced_prompt,
389
  negative_prompt=negative_prompt,
390
- num_inference_steps=40, # More steps for better detail
391
- guidance_scale=7.0,
392
  width=768,
393
  height=768,
394
  generator=generator
 
18
  import gc
19
  import psutil
20
  import threading
 
 
21
 
22
  # External OCI API URL
23
  OCI_API_BASE_URL = "https://yukee1992-oci-story-book.hf.space"
 
51
  model_choice: str = "sdxl"
52
  style: str = "childrens_book"
53
 
54
+ # MODEL SELECTION - SDXL handles longer prompts better
55
  MODEL_CHOICES = {
56
  "sdxl": "stabilityai/stable-diffusion-xl-base-1.0",
57
  "sdxl-turbo": "stabilityai/sdxl-turbo",
 
68
  character_descriptions = {}
69
  character_seeds = {}
70
 
71
+ # Memory monitoring
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  def monitor_memory():
73
  try:
74
  process = psutil.Process()
75
+ return process.memory_info().rss / 1024 / 1024
 
 
76
  except:
77
  return 0
78
 
 
129
  model_cache[model_name] = pipe
130
  return pipe
131
 
132
+ # Initialize default model
133
  print("πŸš€ Initializing Storybook Generator...")
 
134
  current_pipe = load_model("sdxl")
135
+ print("βœ… Model loaded and ready!")
136
 
137
+ # TRUE UNLIMITED PROMPT SOLUTION
138
+ def create_compressed_prompt(scene_visual, characters, style="childrens_book", page_number=1):
139
  """
140
+ Create a compressed but comprehensive prompt that fits within token limits
141
+ while preserving ALL important information
142
  """
143
+ # Extract ONLY the most critical character features
144
+ character_features = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  if characters:
 
146
  for char in characters:
147
  if hasattr(char, 'description'):
148
+ desc = char.description
149
  elif isinstance(char, dict):
150
+ desc = char.get('description', '')
151
+ else:
152
+ desc = str(char)
153
+
154
+ # Extract key features: age, appearance, clothing
155
+ import re
156
+ # Get age if mentioned
157
+ age_match = re.search(r'(\d+)[\- ]?year[\- ]?old', desc, re.IGNORECASE)
158
+ age = f"{age_match.group(1)} year old" if age_match else ""
159
+
160
+ # Get species/type
161
+ species_match = re.search(r'(rabbit|hedgehog|bird|dog|cat|fox|bear|dragon|unicorn|human|girl|boy)', desc, re.IGNORECASE)
162
+ species = species_match.group(1) if species_match else "character"
163
+
164
+ # Get color/main features
165
+ color_match = re.search(r'(blonde|brown|black|white|blue|red|green|yellow|golden|silver)', desc, re.IGNORECASE)
166
+ color = color_match.group(1) if color_match else ""
167
+
168
+ # Get key accessories
169
+ accessories = []
170
+ if 'glasses' in desc.lower(): accessories.append('glasses')
171
+ if 'dress' in desc.lower(): accessories.append('dress')
172
+ if 'hat' in desc.lower(): accessories.append('hat')
173
+ if 'satchel' in desc.lower(): accessories.append('satchel')
174
+
175
+ # Build compressed description
176
+ compressed_desc = f"{age} {color} {species}".strip()
177
+ if accessories:
178
+ compressed_desc += f" with {', '.join(accessories)}"
179
+
180
+ character_features.append(compressed_desc)
181
 
182
+ # Build scene context
183
+ continuity_context = f"scene {page_number}" if page_number > 1 else ""
184
 
185
+ # Style templates (compressed)
186
  style_presets = {
187
+ "childrens_book": "children's book illustration, watercolor, whimsical",
188
+ "realistic": "photorealistic, professional photography",
189
+ "fantasy": "fantasy art, digital painting, magical",
190
+ "anime": "anime style, clean lines, vibrant colors"
191
  }
192
 
193
  style_prompt = style_presets.get(style, style_presets["childrens_book"])
194
 
195
+ # Build the final compressed prompt
196
+ compressed_prompt = f"{continuity_context} {scene_visual}"
 
 
 
 
 
 
 
 
197
 
198
+ if character_features:
199
+ compressed_prompt += f". Characters: {', '.join(character_features)}"
200
 
201
+ compressed_prompt += f". Style: {style_prompt}. masterpiece, best quality, 4K"
202
 
203
+ # Ensure it's within reasonable length
204
+ words = compressed_prompt.split()
205
+ if len(words) > 60:
206
+ compressed_prompt = ' '.join(words[:60]) + '...'
207
+
208
+ return compressed_prompt
209
+
210
+ def enhance_prompt(scene_visual, characters, style="childrens_book", page_number=1):
211
+ """
212
+ Create optimized prompt that preserves essence while fitting token limits
213
+ """
214
+ # Use compressed prompt for the actual generation
215
+ main_prompt = create_compressed_prompt(scene_visual, characters, style, page_number)
216
 
217
+ print(f"πŸ“ Compressed prompt: {main_prompt}")
218
+ print(f"πŸ“ Length: {len(main_prompt.split())} words")
219
 
220
  # Negative prompt
221
  negative_prompt = (
 
224
  "disconnected limbs, mutation, mutated, disgusting, bad art, "
225
  "beginner, amateur, distorted, watermark, signature, text, username, "
226
  "multiple people, crowd, group, different characters, inconsistent features, "
227
+ "changed appearance, different face, altered features, low resolution"
 
228
  )
229
 
230
+ return main_prompt, negative_prompt
231
 
232
  def save_complete_storybook_page(image, story_title, sequence_number, scene_text):
233
  try:
 
285
  )
286
 
287
  print(f"πŸ“– Generating page {sequence_number}")
288
+ print(f"πŸ“ Using prompt: {enhanced_prompt}")
289
 
290
  if characters:
291
  char_names = []
 
303
  char_name = first_char.name if hasattr(first_char, 'name') else first_char.get('name', 'unknown')
304
  main_char_seed = get_character_seed(story_title, char_name, sequence_number)
305
  generator.manual_seed(main_char_seed)
306
+ print(f"🌱 Using seed {main_char_seed} for {char_name}")
307
  else:
308
  scene_seed = hash(f"{story_title}_{sequence_number}") % 1000000
309
  generator.manual_seed(scene_seed)
310
 
311
+ # Generate image
312
  image = current_pipe(
313
  prompt=enhanced_prompt,
314
  negative_prompt=negative_prompt,
315
+ num_inference_steps=35,
316
+ guidance_scale=7.5,
317
  width=768,
318
  height=768,
319
  generator=generator