garyuzair commited on
Commit
ad6d387
·
verified ·
1 Parent(s): eb074a1

Upload 6 files

Browse files
Files changed (5) hide show
  1. animator.py +37 -13
  2. app.py +175 -51
  3. image_generator.py +208 -279
  4. transcriber.py +33 -17
  5. video_creator.py +17 -6
animator.py CHANGED
@@ -10,11 +10,16 @@ class Animator:
10
  def __init__(self):
11
  self.frame_cache = {}
12
  self.aspect_ratio = "1:1" # Default aspect ratio
13
-
 
14
  def set_aspect_ratio(self, aspect_ratio):
15
  """Set the aspect ratio for animations"""
16
  self.aspect_ratio = aspect_ratio
17
 
 
 
 
 
18
  def apply_cinematic_effects(self, image):
19
  """Apply cinematic effects to enhance the frame quality"""
20
  try:
@@ -65,8 +70,11 @@ class Animator:
65
  return Image.open(image)
66
  return image
67
 
68
- def add_zoom_animation(self, image_path, num_frames=10, zoom_factor=1.05, output_dir="temp"):
69
  """Add a simple zoom animation to an image with cinematic effects"""
 
 
 
70
  # Check cache first
71
  cache_key = f"zoom_{image_path}_{num_frames}_{zoom_factor}_{self.aspect_ratio}"
72
  if cache_key in self.frame_cache:
@@ -102,8 +110,11 @@ class Animator:
102
  self.frame_cache[cache_key] = frames
103
  return frames
104
 
105
- def add_pan_animation(self, image_path, num_frames=10, direction="right", output_dir="temp"):
106
  """Add a simple panning animation to an image with cinematic effects"""
 
 
 
107
  # Check cache first
108
  cache_key = f"pan_{image_path}_{num_frames}_{direction}_{self.aspect_ratio}"
109
  if cache_key in self.frame_cache:
@@ -165,8 +176,11 @@ class Animator:
165
  self.frame_cache[cache_key] = frames
166
  return frames
167
 
168
- def add_fade_animation(self, image_path, num_frames=10, fade_type="in", output_dir="temp"):
169
  """Add a fade in/out animation to an image with cinematic effects"""
 
 
 
170
  # Check cache first
171
  cache_key = f"fade_{image_path}_{num_frames}_{fade_type}_{self.aspect_ratio}"
172
  if cache_key in self.frame_cache:
@@ -207,8 +221,11 @@ class Animator:
207
  self.frame_cache[cache_key] = frames
208
  return frames
209
 
210
- def add_ken_burns_effect(self, image_path, num_frames=10, output_dir="temp"):
211
  """Add a Ken Burns effect (combination of pan and zoom) with cinematic effects"""
 
 
 
212
  # Check cache first
213
  cache_key = f"kenburns_{image_path}_{num_frames}_{self.aspect_ratio}"
214
  if cache_key in self.frame_cache:
@@ -279,8 +296,11 @@ class Animator:
279
  self.frame_cache[cache_key] = frames
280
  return frames
281
 
282
- def animate_single_image(self, img_path, animation_type="random", output_dir="temp"):
283
  """Animate a single image with cinematic effects"""
 
 
 
284
  # Choose animation type
285
  animation_types = ["zoom", "pan_right", "pan_left", "fade_in", "ken_burns"]
286
 
@@ -302,21 +322,24 @@ class Animator:
302
 
303
  # Apply the chosen animation
304
  if chosen_type == "ken_burns":
305
- frames = self.add_ken_burns_effect(img_path, output_dir=output_dir)
306
  elif chosen_type.startswith("pan"):
307
  direction = chosen_type.split("_")[1] if "_" in chosen_type else "right"
308
- frames = self.add_pan_animation(img_path, direction=direction, output_dir=output_dir)
309
  elif chosen_type.startswith("fade"):
310
  fade_type = chosen_type.split("_")[1] if "_" in chosen_type else "in"
311
- frames = self.add_fade_animation(img_path, fade_type=fade_type, output_dir=output_dir)
312
  else: # Default to zoom
313
- frames = self.add_zoom_animation(img_path, output_dir=output_dir)
314
 
315
  return frames
316
 
317
  def animate_images(self, image_paths, animation_type="random", output_dir="temp",
318
- progress_callback=None, parallel=False, max_workers=4, batch_size=2):
319
  """Add animations to a list of images with parallel processing and batching"""
 
 
 
320
  all_animated_frames = []
321
 
322
  if parallel and len(image_paths) > 1:
@@ -325,7 +348,8 @@ class Animator:
325
  # Create a partial function with fixed parameters
326
  animate_func = partial(self.animate_single_image,
327
  animation_type=animation_type,
328
- output_dir=output_dir)
 
329
 
330
  # Process images in parallel
331
  if progress_callback:
@@ -343,7 +367,7 @@ class Animator:
343
 
344
  batch_frames = []
345
  for img_path in batch:
346
- frames = self.animate_single_image(img_path, animation_type, output_dir)
347
  batch_frames.append(frames)
348
 
349
  all_animated_frames.extend(batch_frames)
 
10
  def __init__(self):
11
  self.frame_cache = {}
12
  self.aspect_ratio = "1:1" # Default aspect ratio
13
+ self.frames_per_animation = 15 # Default number of frames per animation for smoother transitions
14
+
15
  def set_aspect_ratio(self, aspect_ratio):
16
  """Set the aspect ratio for animations"""
17
  self.aspect_ratio = aspect_ratio
18
 
19
+ def set_frames_per_animation(self, frames):
20
+ """Set the number of frames per animation"""
21
+ self.frames_per_animation = max(10, min(frames, 20)) # Keep between 10-20 frames for balance
22
+
23
  def apply_cinematic_effects(self, image):
24
  """Apply cinematic effects to enhance the frame quality"""
25
  try:
 
70
  return Image.open(image)
71
  return image
72
 
73
+ def add_zoom_animation(self, image_path, num_frames=None, zoom_factor=1.05, output_dir="temp"):
74
  """Add a simple zoom animation to an image with cinematic effects"""
75
+ if num_frames is None:
76
+ num_frames = self.frames_per_animation
77
+
78
  # Check cache first
79
  cache_key = f"zoom_{image_path}_{num_frames}_{zoom_factor}_{self.aspect_ratio}"
80
  if cache_key in self.frame_cache:
 
110
  self.frame_cache[cache_key] = frames
111
  return frames
112
 
113
+ def add_pan_animation(self, image_path, num_frames=None, direction="right", output_dir="temp"):
114
  """Add a simple panning animation to an image with cinematic effects"""
115
+ if num_frames is None:
116
+ num_frames = self.frames_per_animation
117
+
118
  # Check cache first
119
  cache_key = f"pan_{image_path}_{num_frames}_{direction}_{self.aspect_ratio}"
120
  if cache_key in self.frame_cache:
 
176
  self.frame_cache[cache_key] = frames
177
  return frames
178
 
179
+ def add_fade_animation(self, image_path, num_frames=None, fade_type="in", output_dir="temp"):
180
  """Add a fade in/out animation to an image with cinematic effects"""
181
+ if num_frames is None:
182
+ num_frames = self.frames_per_animation
183
+
184
  # Check cache first
185
  cache_key = f"fade_{image_path}_{num_frames}_{fade_type}_{self.aspect_ratio}"
186
  if cache_key in self.frame_cache:
 
221
  self.frame_cache[cache_key] = frames
222
  return frames
223
 
224
+ def add_ken_burns_effect(self, image_path, num_frames=None, output_dir="temp"):
225
  """Add a Ken Burns effect (combination of pan and zoom) with cinematic effects"""
226
+ if num_frames is None:
227
+ num_frames = self.frames_per_animation
228
+
229
  # Check cache first
230
  cache_key = f"kenburns_{image_path}_{num_frames}_{self.aspect_ratio}"
231
  if cache_key in self.frame_cache:
 
296
  self.frame_cache[cache_key] = frames
297
  return frames
298
 
299
+ def animate_single_image(self, img_path, animation_type="random", output_dir="temp", num_frames=None):
300
  """Animate a single image with cinematic effects"""
301
+ if num_frames is None:
302
+ num_frames = self.frames_per_animation
303
+
304
  # Choose animation type
305
  animation_types = ["zoom", "pan_right", "pan_left", "fade_in", "ken_burns"]
306
 
 
322
 
323
  # Apply the chosen animation
324
  if chosen_type == "ken_burns":
325
+ frames = self.add_ken_burns_effect(img_path, num_frames=num_frames, output_dir=output_dir)
326
  elif chosen_type.startswith("pan"):
327
  direction = chosen_type.split("_")[1] if "_" in chosen_type else "right"
328
+ frames = self.add_pan_animation(img_path, num_frames=num_frames, direction=direction, output_dir=output_dir)
329
  elif chosen_type.startswith("fade"):
330
  fade_type = chosen_type.split("_")[1] if "_" in chosen_type else "in"
331
+ frames = self.add_fade_animation(img_path, num_frames=num_frames, fade_type=fade_type, output_dir=output_dir)
332
  else: # Default to zoom
333
+ frames = self.add_zoom_animation(img_path, num_frames=num_frames, output_dir=output_dir)
334
 
335
  return frames
336
 
337
  def animate_images(self, image_paths, animation_type="random", output_dir="temp",
338
+ progress_callback=None, parallel=False, max_workers=4, batch_size=2, num_frames=None):
339
  """Add animations to a list of images with parallel processing and batching"""
340
+ if num_frames is None:
341
+ num_frames = self.frames_per_animation
342
+
343
  all_animated_frames = []
344
 
345
  if parallel and len(image_paths) > 1:
 
348
  # Create a partial function with fixed parameters
349
  animate_func = partial(self.animate_single_image,
350
  animation_type=animation_type,
351
+ output_dir=output_dir,
352
+ num_frames=num_frames)
353
 
354
  # Process images in parallel
355
  if progress_callback:
 
367
 
368
  batch_frames = []
369
  for img_path in batch:
370
+ frames = self.animate_single_image(img_path, animation_type, output_dir, num_frames)
371
  batch_frames.append(frames)
372
 
373
  all_animated_frames.extend(batch_frames)
app.py CHANGED
@@ -7,6 +7,7 @@ from functools import partial
7
  import torch
8
  import hashlib
9
  from PIL import Image, ImageDraw
 
10
 
11
  from transcriber import AudioTranscriber
12
  from prompt_generator import PromptGenerator
@@ -128,7 +129,17 @@ def generate_prompt_for_segment(transcription, prompt_generator, aspect_ratio="1
128
  def generate_image_for_prompt(prompt, image_generator):
129
  """Generate an image for a single prompt in parallel"""
130
  try:
131
- return image_generator.generate_image(prompt)
 
 
 
 
 
 
 
 
 
 
132
  except Exception as e:
133
  st.warning(f"Error generating image: {str(e)}. Using fallback image.")
134
  # Create a fallback image
@@ -140,10 +151,10 @@ def generate_image_for_prompt(prompt, image_generator):
140
  img.save(path)
141
  return path
142
 
143
- def animate_image(image_path, animator, animation_type="random"):
144
  """Animate a single image in parallel"""
145
  try:
146
- return animator.animate_single_image(image_path, animation_type)
147
  except Exception as e:
148
  st.warning(f"Error animating image: {str(e)}. Using static frames.")
149
  # Create a sequence of identical frames as fallback
@@ -197,17 +208,44 @@ def main():
197
  help="Number of simultaneous tasks (higher values may use more memory)")
198
  use_caching = st.toggle("Enable result caching", value=True,
199
  help="Save results to speed up repeated conversions")
 
 
 
 
200
 
201
  # Content settings
202
  st.markdown("### 🎨 Content")
203
  with st.expander("Segmentation", expanded=True):
204
- num_segments = st.slider("Number of segments", min_value=2, max_value=10, value=5,
205
- help="How many scenes to create in your video")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  animation_type = st.selectbox(
207
  "Animation style",
208
  ["random", "zoom", "pan_right", "pan_left", "fade_in", "ken_burns"],
209
  help="Choose how images will animate in your video"
210
  )
 
 
 
 
 
 
 
 
 
211
 
212
  # Advanced settings
213
  st.markdown("### 🔧 Advanced")
@@ -260,6 +298,7 @@ def main():
260
 
261
  Optimized for Hugging Face Spaces with:
262
  - Multiple video formats (16:9, 1:1, 9:16)
 
263
  - Parallel processing
264
  - Memory-efficient models
265
  - Result caching
@@ -278,7 +317,7 @@ def main():
278
 
279
  # Generate a cache key based on the audio file and settings
280
  audio_bytes = audio_file.getvalue()
281
- settings_str = f"{num_segments}_{animation_type}_{base_image_size}_{inference_steps}_{video_quality}_{selected_aspect_ratio}"
282
  cache_key = hashlib.md5((hashlib.md5(audio_bytes).hexdigest() + settings_str).encode()).hexdigest()
283
 
284
  # Process button with better styling
@@ -325,6 +364,11 @@ def main():
325
  status_message = st.empty()
326
 
327
  try:
 
 
 
 
 
328
  # Step 1: Initialize components
329
  status_text.text("Initializing components...")
330
  status_message.markdown("🔄 **Setting up AI models...**")
@@ -339,6 +383,13 @@ def main():
339
  animator.set_aspect_ratio(selected_aspect_ratio)
340
  video_creator.set_aspect_ratio(selected_aspect_ratio)
341
 
 
 
 
 
 
 
 
342
  # Calculate actual image size based on aspect ratio
343
  actual_image_size = image_generator.get_size_for_aspect_ratio(base_image_size, selected_aspect_ratio)
344
 
@@ -359,7 +410,7 @@ def main():
359
  import numpy as np
360
  audio_segments = [np.zeros(16000) for _ in range(num_segments)] # 1-second silent segments
361
  total_duration = 5 * num_segments # Assume 5 seconds per segment
362
- timestamps = [(i*5, (i+1)*5) for i in range(num_segments)]
363
 
364
  progress_bar.progress(15)
365
 
@@ -382,6 +433,11 @@ def main():
382
  st.warning(f"Error transcribing segment: {str(e)}. Using empty transcription.")
383
  transcriptions.append("")
384
 
 
 
 
 
 
385
  # Display transcriptions with better styling
386
  progress_bar.progress(30)
387
  st.markdown("### 📝 Transcriptions")
@@ -423,32 +479,50 @@ def main():
423
  </div>
424
  """, unsafe_allow_html=True)
425
 
426
- # Step 4: Generate images in parallel
427
  status_text.text("Generating images from prompts...")
428
  status_message.markdown("🎨 **Creating images...**")
429
- if parallel_processing:
430
- with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
431
- # Create a partial function with the image generator
432
- image_func = partial(generate_image_for_prompt, image_generator=image_generator)
433
- # Generate images in parallel
434
- images = list(executor.map(image_func, prompts))
435
- else:
436
  images = []
437
- for i, prompt in enumerate(prompts):
438
- status_text.text(f"Generating image {i+1}/{len(prompts)}...")
439
- try:
440
- img_path = image_generator.generate_image(prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
  images.append(img_path)
442
- except Exception as e:
443
- st.warning(f"Error generating image: {str(e)}. Using fallback image.")
444
- # Create a fallback image
445
- from PIL import Image, ImageDraw
446
- img = Image.new('RGB', image_generator.target_size, color=(240, 240, 240))
447
- draw = ImageDraw.Draw(img)
448
- draw.text((10, 10), prompt[:50], fill=(0, 0, 0))
449
- path = f"temp/fallback_{int(time.time() * 1000)}.png"
450
- img.save(path)
451
- images.append(path)
452
 
453
  # Display images with better styling
454
  progress_bar.progress(60)
@@ -458,32 +532,73 @@ def main():
458
  with image_cols[i % len(image_cols)]:
459
  st.image(img_path, caption=f"Image {i+1}", use_column_width=True)
460
 
461
- # Step 5: Add animations in parallel
 
 
 
 
 
462
  status_text.text("Adding animations to images...")
463
  status_message.markdown("✨ **Adding animations...**")
464
- if parallel_processing:
465
- with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
466
- # Create a partial function with the animator and animation type
467
- animate_func = partial(animate_image, animator=animator, animation_type=animation_type)
468
- # Animate images in parallel
469
- animated_frames = list(executor.map(animate_func, images))
470
- else:
471
  animated_frames = []
472
- for i, img_path in enumerate(images):
473
- status_text.text(f"Animating image {i+1}/{len(images)}...")
474
- try:
475
- frames = animator.animate_single_image(img_path, animation_type)
476
- animated_frames.append(frames)
477
- except Exception as e:
478
- st.warning(f"Error animating image: {str(e)}. Using static frames.")
479
- # Create a sequence of identical frames as fallback
480
- frames = []
481
- for _ in range(10):
482
- frames.append(img_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
483
  animated_frames.append(frames)
484
 
485
  progress_bar.progress(80)
486
 
 
 
 
 
 
487
  # Step 6: Create video
488
  status_text.text("Creating final video...")
489
  status_message.markdown("🎬 **Assembling video...**")
@@ -492,7 +607,7 @@ def main():
492
  audio_file,
493
  segments=transcriptions,
494
  timestamps=timestamps,
495
- parallel=parallel_processing,
496
  max_workers=max_workers
497
  )
498
 
@@ -510,7 +625,7 @@ def main():
510
  output_video = video_creator.optimize_video(
511
  output_video,
512
  bitrate=bitrate,
513
- threads=max_workers
514
  )
515
 
516
  # Cache the result if caching is enabled
@@ -541,7 +656,10 @@ def main():
541
  st.markdown("### ⏱️ Performance Metrics")
542
  st.info(f"""
543
  - Video Format: {aspect_ratio}
 
 
544
  - Parallel Processing: {'Enabled' if parallel_processing else 'Disabled'}
 
545
  - Workers: {max_workers}
546
  - Image Size: {actual_image_size[0]}x{actual_image_size[1]}
547
  - Inference Steps: {inference_steps}
@@ -557,6 +675,11 @@ def main():
557
  except:
558
  pass
559
 
 
 
 
 
 
560
  status_text.text("All done! Your video is ready for download.")
561
 
562
  except Exception as e:
@@ -566,9 +689,10 @@ def main():
566
  # Provide troubleshooting tips
567
  st.markdown("### 🔧 Troubleshooting Tips")
568
  st.info("""
569
- - Try reducing the number of segments
570
  - Use a smaller image size
571
  - Reduce inference steps
 
572
  - Make sure your audio file is in a supported format
573
  - Clear the cache and try again
574
  """)
 
7
  import torch
8
  import hashlib
9
  from PIL import Image, ImageDraw
10
+ import gc
11
 
12
  from transcriber import AudioTranscriber
13
  from prompt_generator import PromptGenerator
 
129
  def generate_image_for_prompt(prompt, image_generator):
130
  """Generate an image for a single prompt in parallel"""
131
  try:
132
+ # Force garbage collection before generating each image
133
+ gc.collect()
134
+ torch.cuda.empty_cache() if torch.cuda.is_available() else None
135
+
136
+ image_path = image_generator.generate_image(prompt)
137
+
138
+ # Force garbage collection after generating each image
139
+ gc.collect()
140
+ torch.cuda.empty_cache() if torch.cuda.is_available() else None
141
+
142
+ return image_path
143
  except Exception as e:
144
  st.warning(f"Error generating image: {str(e)}. Using fallback image.")
145
  # Create a fallback image
 
151
  img.save(path)
152
  return path
153
 
154
+ def animate_image(image_path, animator, animation_type="random", num_frames=15):
155
  """Animate a single image in parallel"""
156
  try:
157
+ return animator.animate_single_image(image_path, animation_type, num_frames=num_frames)
158
  except Exception as e:
159
  st.warning(f"Error animating image: {str(e)}. Using static frames.")
160
  # Create a sequence of identical frames as fallback
 
208
  help="Number of simultaneous tasks (higher values may use more memory)")
209
  use_caching = st.toggle("Enable result caching", value=True,
210
  help="Save results to speed up repeated conversions")
211
+
212
+ # Memory optimization settings
213
+ memory_optimization = st.toggle("Enable memory optimization", value=True,
214
+ help="Reduce memory usage (recommended for Hugging Face Spaces)")
215
 
216
  # Content settings
217
  st.markdown("### 🎨 Content")
218
  with st.expander("Segmentation", expanded=True):
219
+ # New setting for maximum segment duration
220
+ max_segment_duration = st.slider(
221
+ "Maximum image duration (seconds)",
222
+ min_value=1.0,
223
+ max_value=5.0,
224
+ value=5.0,
225
+ step=0.5,
226
+ help="Maximum time each image will stay on screen (5 seconds or less)"
227
+ )
228
+
229
+ # Adjust number of segments based on max duration
230
+ st.info("More images will be created to ensure each stays under the maximum duration")
231
+
232
+ num_segments = st.slider("Minimum number of segments", min_value=2, max_value=20, value=5,
233
+ help="Minimum number of scenes to create in your video")
234
+
235
  animation_type = st.selectbox(
236
  "Animation style",
237
  ["random", "zoom", "pan_right", "pan_left", "fade_in", "ken_burns"],
238
  help="Choose how images will animate in your video"
239
  )
240
+
241
+ # Animation frames setting
242
+ frames_per_animation = st.slider(
243
+ "Animation smoothness",
244
+ min_value=10,
245
+ max_value=20,
246
+ value=15,
247
+ help="Higher values create smoother animations but may increase processing time"
248
+ )
249
 
250
  # Advanced settings
251
  st.markdown("### 🔧 Advanced")
 
298
 
299
  Optimized for Hugging Face Spaces with:
300
  - Multiple video formats (16:9, 1:1, 9:16)
301
+ - Dynamic image timing (5 seconds or less)
302
  - Parallel processing
303
  - Memory-efficient models
304
  - Result caching
 
317
 
318
  # Generate a cache key based on the audio file and settings
319
  audio_bytes = audio_file.getvalue()
320
+ settings_str = f"{num_segments}_{max_segment_duration}_{animation_type}_{frames_per_animation}_{base_image_size}_{inference_steps}_{video_quality}_{selected_aspect_ratio}_{memory_optimization}"
321
  cache_key = hashlib.md5((hashlib.md5(audio_bytes).hexdigest() + settings_str).encode()).hexdigest()
322
 
323
  # Process button with better styling
 
364
  status_message = st.empty()
365
 
366
  try:
367
+ # Force garbage collection before starting
368
+ if memory_optimization:
369
+ gc.collect()
370
+ torch.cuda.empty_cache() if torch.cuda.is_available() else None
371
+
372
  # Step 1: Initialize components
373
  status_text.text("Initializing components...")
374
  status_message.markdown("🔄 **Setting up AI models...**")
 
383
  animator.set_aspect_ratio(selected_aspect_ratio)
384
  video_creator.set_aspect_ratio(selected_aspect_ratio)
385
 
386
+ # Set maximum segment duration
387
+ transcriber.set_max_segment_duration(max_segment_duration)
388
+ video_creator.set_max_segment_duration(max_segment_duration)
389
+
390
+ # Set animation frames
391
+ animator.set_frames_per_animation(frames_per_animation)
392
+
393
  # Calculate actual image size based on aspect ratio
394
  actual_image_size = image_generator.get_size_for_aspect_ratio(base_image_size, selected_aspect_ratio)
395
 
 
410
  import numpy as np
411
  audio_segments = [np.zeros(16000) for _ in range(num_segments)] # 1-second silent segments
412
  total_duration = 5 * num_segments # Assume 5 seconds per segment
413
+ timestamps = [(i*5, min((i+1)*5, i*5+max_segment_duration)) for i in range(num_segments)]
414
 
415
  progress_bar.progress(15)
416
 
 
433
  st.warning(f"Error transcribing segment: {str(e)}. Using empty transcription.")
434
  transcriptions.append("")
435
 
436
+ # Force garbage collection after transcription
437
+ if memory_optimization:
438
+ gc.collect()
439
+ torch.cuda.empty_cache() if torch.cuda.is_available() else None
440
+
441
  # Display transcriptions with better styling
442
  progress_bar.progress(30)
443
  st.markdown("### 📝 Transcriptions")
 
479
  </div>
480
  """, unsafe_allow_html=True)
481
 
482
+ # Step 4: Generate images in parallel or batches
483
  status_text.text("Generating images from prompts...")
484
  status_message.markdown("🎨 **Creating images...**")
485
+
486
+ # For memory optimization, process in smaller batches even with parallel processing
487
+ if memory_optimization:
488
+ batch_size = 2 # Process only 2 images at a time to conserve memory
 
 
 
489
  images = []
490
+
491
+ for i in range(0, len(prompts), batch_size):
492
+ batch_prompts = prompts[i:i+batch_size]
493
+ status_text.text(f"Generating images {i+1}-{min(i+batch_size, len(prompts))}/{len(prompts)}...")
494
+
495
+ if parallel_processing and batch_size > 1:
496
+ with concurrent.futures.ThreadPoolExecutor(max_workers=min(batch_size, max_workers)) as executor:
497
+ # Create a partial function with the image generator
498
+ image_func = partial(generate_image_for_prompt, image_generator=image_generator)
499
+ # Generate images in parallel within the batch
500
+ batch_images = list(executor.map(image_func, batch_prompts))
501
+ else:
502
+ batch_images = []
503
+ for prompt in batch_prompts:
504
+ img_path = generate_image_for_prompt(prompt, image_generator)
505
+ batch_images.append(img_path)
506
+
507
+ images.extend(batch_images)
508
+
509
+ # Force garbage collection after each batch
510
+ gc.collect()
511
+ torch.cuda.empty_cache() if torch.cuda.is_available() else None
512
+ else:
513
+ # Standard processing without special memory considerations
514
+ if parallel_processing:
515
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
516
+ # Create a partial function with the image generator
517
+ image_func = partial(generate_image_for_prompt, image_generator=image_generator)
518
+ # Generate images in parallel
519
+ images = list(executor.map(image_func, prompts))
520
+ else:
521
+ images = []
522
+ for i, prompt in enumerate(prompts):
523
+ status_text.text(f"Generating image {i+1}/{len(prompts)}...")
524
+ img_path = generate_image_for_prompt(prompt, image_generator)
525
  images.append(img_path)
 
 
 
 
 
 
 
 
 
 
526
 
527
  # Display images with better styling
528
  progress_bar.progress(60)
 
532
  with image_cols[i % len(image_cols)]:
533
  st.image(img_path, caption=f"Image {i+1}", use_column_width=True)
534
 
535
+ # Force garbage collection after image generation
536
+ if memory_optimization:
537
+ gc.collect()
538
+ torch.cuda.empty_cache() if torch.cuda.is_available() else None
539
+
540
+ # Step 5: Add animations in parallel or batches
541
  status_text.text("Adding animations to images...")
542
  status_message.markdown("✨ **Adding animations...**")
543
+
544
+ # For memory optimization, process in smaller batches
545
+ if memory_optimization:
546
+ batch_size = 3 # Process only 3 animations at a time
 
 
 
547
  animated_frames = []
548
+
549
+ for i in range(0, len(images), batch_size):
550
+ batch_images = images[i:i+batch_size]
551
+ status_text.text(f"Animating images {i+1}-{min(i+batch_size, len(images))}/{len(images)}...")
552
+
553
+ if parallel_processing and batch_size > 1:
554
+ with concurrent.futures.ThreadPoolExecutor(max_workers=min(batch_size, max_workers)) as executor:
555
+ # Create a partial function with the animator, animation type, and frames
556
+ animate_func = partial(animate_image,
557
+ animator=animator,
558
+ animation_type=animation_type,
559
+ num_frames=frames_per_animation)
560
+ # Animate images in parallel within the batch
561
+ batch_frames = list(executor.map(animate_func, batch_images))
562
+ else:
563
+ batch_frames = []
564
+ for img_path in batch_images:
565
+ frames = animate_image(img_path, animator, animation_type, frames_per_animation)
566
+ batch_frames.append(frames)
567
+
568
+ animated_frames.extend(batch_frames)
569
+
570
+ # Force garbage collection after each batch
571
+ gc.collect()
572
+ torch.cuda.empty_cache() if torch.cuda.is_available() else None
573
+ else:
574
+ # Standard processing without special memory considerations
575
+ if parallel_processing:
576
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
577
+ # Create a partial function with the animator, animation type, and frames
578
+ animate_func = partial(animate_image,
579
+ animator=animator,
580
+ animation_type=animation_type,
581
+ num_frames=frames_per_animation)
582
+ # Animate images in parallel
583
+ animated_frames = list(executor.map(animate_func, images))
584
+ else:
585
+ animated_frames = []
586
+ for i, img_path in enumerate(images):
587
+ status_text.text(f"Animating image {i+1}/{len(images)}...")
588
+ frames = animator.animate_single_image(
589
+ img_path,
590
+ animation_type,
591
+ num_frames=frames_per_animation
592
+ )
593
  animated_frames.append(frames)
594
 
595
  progress_bar.progress(80)
596
 
597
+ # Force garbage collection before video creation
598
+ if memory_optimization:
599
+ gc.collect()
600
+ torch.cuda.empty_cache() if torch.cuda.is_available() else None
601
+
602
  # Step 6: Create video
603
  status_text.text("Creating final video...")
604
  status_message.markdown("🎬 **Assembling video...**")
 
607
  audio_file,
608
  segments=transcriptions,
609
  timestamps=timestamps,
610
+ parallel=parallel_processing and not memory_optimization, # Disable parallel for memory optimization
611
  max_workers=max_workers
612
  )
613
 
 
625
  output_video = video_creator.optimize_video(
626
  output_video,
627
  bitrate=bitrate,
628
+ threads=2 if memory_optimization else max_workers # Use fewer threads for memory optimization
629
  )
630
 
631
  # Cache the result if caching is enabled
 
656
  st.markdown("### ⏱️ Performance Metrics")
657
  st.info(f"""
658
  - Video Format: {aspect_ratio}
659
+ - Max Image Duration: {max_segment_duration} seconds
660
+ - Number of Segments: {len(audio_segments)}
661
  - Parallel Processing: {'Enabled' if parallel_processing else 'Disabled'}
662
+ - Memory Optimization: {'Enabled' if memory_optimization else 'Disabled'}
663
  - Workers: {max_workers}
664
  - Image Size: {actual_image_size[0]}x{actual_image_size[1]}
665
  - Inference Steps: {inference_steps}
 
675
  except:
676
  pass
677
 
678
+ # Final garbage collection
679
+ if memory_optimization:
680
+ gc.collect()
681
+ torch.cuda.empty_cache() if torch.cuda.is_available() else None
682
+
683
  status_text.text("All done! Your video is ready for download.")
684
 
685
  except Exception as e:
 
689
  # Provide troubleshooting tips
690
  st.markdown("### 🔧 Troubleshooting Tips")
691
  st.info("""
692
+ - Try enabling memory optimization
693
  - Use a smaller image size
694
  - Reduce inference steps
695
+ - Reduce the number of segments
696
  - Make sure your audio file is in a supported format
697
  - Clear the cache and try again
698
  """)
image_generator.py CHANGED
@@ -1,104 +1,53 @@
1
  import streamlit as st
2
- import torch
3
  import os
4
- import numpy as np
5
- from PIL import Image, ImageEnhance, ImageFilter
 
6
  import time
7
- from concurrent.futures import ThreadPoolExecutor
8
- from functools import partial
9
 
10
  class ImageGenerator:
11
  def __init__(self):
12
  self.model = None
 
 
13
  self.inference_steps = 20
14
- self.target_size = (384, 384)
15
  self.aspect_ratio = "1:1" # Default aspect ratio
 
16
 
17
- def load_model(self):
18
- """Load a lightweight image generation model"""
19
- if self.model is None:
20
- with st.spinner("Loading image generation model... This may take a moment."):
21
- try:
22
- # Using a lightweight model for image generation
23
- from diffusers import StableDiffusionPipeline
24
-
25
- model_id = "sd-legacy/stable-diffusion-v1-5"
26
-
27
- # Load with memory optimization settings
28
- self.model = StableDiffusionPipeline.from_pretrained(
29
- model_id,
30
- torch_dtype=torch.float32,
31
- safety_checker=None,
32
- requires_safety_checker=False,
33
- low_cpu_mem_usage=True
34
- )
35
-
36
- # Use CPU for inference to save memory
37
- self.model = self.model.to("cpu")
38
-
39
- # Enable memory efficient attention if available
40
- if hasattr(self.model, 'enable_attention_slicing'):
41
- self.model.enable_attention_slicing()
42
-
43
- # Enable memory efficient attention
44
- if hasattr(self.model, 'enable_vae_slicing'):
45
- self.model.enable_vae_slicing()
46
-
47
- # Enable xformers memory efficient attention if available
48
- try:
49
- if hasattr(self.model, 'enable_xformers_memory_efficient_attention'):
50
- self.model.enable_xformers_memory_efficient_attention()
51
- except:
52
- pass
53
- except Exception as e:
54
- st.warning(f"Error loading image generation model: {str(e)}. Using fallback method.")
55
- self.model = None
56
-
57
- return self.model
58
-
59
- def set_inference_steps(self, steps):
60
- """Set the number of inference steps"""
61
- self.inference_steps = steps
62
 
63
  def set_target_size(self, size):
64
- """Set the target image size"""
65
  self.target_size = size
66
 
67
- def set_aspect_ratio(self, aspect_ratio):
68
- """Set the aspect ratio for generated images"""
69
- self.aspect_ratio = aspect_ratio
70
-
71
- # Update target size based on aspect ratio while maintaining total pixels
72
- base_pixels = self.target_size[0] * self.target_size[1]
73
 
74
- if aspect_ratio == "1:1":
75
- # Square format
76
- side = int(np.sqrt(base_pixels))
77
- self.target_size = (side, side)
78
- elif aspect_ratio == "16:9":
79
- # Landscape format
80
- width = int(np.sqrt(base_pixels * 16 / 9))
81
- height = int(width * 9 / 16)
82
- self.target_size = (width, height)
83
- elif aspect_ratio == "9:16":
84
- # Portrait format
85
- height = int(np.sqrt(base_pixels * 16 / 9))
86
- width = int(height * 9 / 16)
87
- self.target_size = (width, height)
88
-
89
- def get_size_for_aspect_ratio(self, base_size, aspect_ratio):
90
- """Calculate dimensions for a given aspect ratio while maintaining approximate total pixels"""
91
  base_pixels = base_size[0] * base_size[1]
92
 
93
  if aspect_ratio == "1:1":
94
  # Square format
95
  side = int(np.sqrt(base_pixels))
 
 
96
  return (side, side)
97
  elif aspect_ratio == "16:9":
98
  # Landscape format
99
  width = int(np.sqrt(base_pixels * 16 / 9))
100
  height = int(width * 9 / 16)
101
- # Ensure dimensions are even numbers for video compatibility
102
  width = width if width % 2 == 0 else width + 1
103
  height = height if height % 2 == 0 else height + 1
104
  return (width, height)
@@ -106,242 +55,222 @@ class ImageGenerator:
106
  # Portrait format
107
  height = int(np.sqrt(base_pixels * 16 / 9))
108
  width = int(height * 9 / 16)
109
- # Ensure dimensions are even numbers for video compatibility
110
  width = width if width % 2 == 0 else width + 1
111
  height = height if height % 2 == 0 else height + 1
112
  return (width, height)
113
  else:
114
  # Default to original size
115
  return base_size
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
- def apply_cinematic_effects(self, image):
118
- """Apply cinematic effects to enhance the image quality"""
119
- try:
120
- # Enhance contrast slightly
121
- enhancer = ImageEnhance.Contrast(image)
122
- image = enhancer.enhance(1.2)
123
-
124
- # Enhance color saturation slightly
125
- enhancer = ImageEnhance.Color(image)
126
- image = enhancer.enhance(1.1)
127
-
128
- # Add subtle vignette effect
129
- # Create a radial gradient mask
130
- mask = Image.new('L', image.size, 255)
131
- draw = ImageDraw.Draw(mask)
132
-
133
- width, height = image.size
134
- center_x, center_y = width // 2, height // 2
135
- max_radius = min(width, height) // 2
136
-
137
- for y in range(height):
138
- for x in range(width):
139
- # Calculate distance from center
140
- distance = np.sqrt((x - center_x)**2 + (y - center_y)**2)
141
- # Create vignette effect (darker at edges)
142
- intensity = int(255 * (1 - 0.3 * (distance / max_radius)**2))
143
- mask.putpixel((x, y), intensity)
144
-
145
- # Apply the mask
146
- image = Image.composite(image, Image.new('RGB', image.size, (0, 0, 0)), mask)
147
-
148
- # Add subtle film grain
149
- grain = Image.effect_noise((image.width, image.height), 10)
150
- grain = grain.convert('L')
151
- grain = grain.filter(ImageFilter.GaussianBlur(radius=1))
152
- image = Image.blend(image, Image.composite(image, Image.new('RGB', image.size, (128, 128, 128)), grain), 0.05)
153
-
154
- return image
155
- except Exception as e:
156
- # If effects fail, return original image
157
- return image
158
-
159
- def generate_image(self, prompt, output_dir="temp"):
160
- """Generate a single image from a prompt"""
161
  # Ensure output directory exists
162
- os.makedirs(output_dir, exist_ok=True)
163
 
164
  try:
165
  # Load the model if not already loaded
166
  model = self.load_model()
167
 
168
  if model is not None:
169
- # Generate image with minimal inference steps to save resources
170
- image = model(
171
- prompt,
172
- num_inference_steps=self.inference_steps,
173
- guidance_scale=7.5
174
- ).images[0]
175
 
176
- # Resize to target size for consistency and performance
177
- if image.size != self.target_size:
178
- image = image.resize(self.target_size, Image.LANCZOS)
179
-
180
- # Apply cinematic effects
181
- image = self.apply_cinematic_effects(image)
182
- else:
183
- # Fallback: Create a colored gradient image with text
184
- from PIL import Image, ImageDraw, ImageFilter
185
 
186
- # Create a base image with gradient background
187
- image = Image.new('RGB', self.target_size, color=(240, 240, 240))
188
- draw = ImageDraw.Draw(image)
 
 
 
 
 
 
 
 
 
189
 
190
- # Create a gradient background
191
- for y in range(image.height):
192
- for x in range(image.width):
193
- # Create a simple gradient
194
- r = int(200 + (x * 55 / image.width))
195
- g = int(200 + (y * 55 / image.height))
196
- b = 240
197
- draw.point((x, y), fill=(r, g, b))
198
 
199
- # Add some noise/texture
200
- image = image.filter(ImageFilter.GaussianBlur(radius=1))
 
201
 
202
- # Add text from prompt (truncated)
203
- draw = ImageDraw.Draw(image)
204
- text = prompt[:50] + "..." if len(prompt) > 50 else prompt
205
 
206
- # Position text
207
- text_width = draw.textlength(text, font=None)
208
- text_position = ((image.width - text_width) / 2, image.height / 2)
209
-
210
- # Draw text
211
- draw.text(text_position, text, fill=(0, 0, 0))
212
-
213
  except Exception as e:
214
  st.warning(f"Error generating image: {str(e)}. Using fallback method.")
215
-
216
- # Fallback: Create a colored gradient image with text
217
- from PIL import Image, ImageDraw, ImageFilter
218
-
219
- # Create a base image with gradient background
220
- image = Image.new('RGB', self.target_size, color=(240, 240, 240))
221
- draw = ImageDraw.Draw(image)
222
-
223
- # Create a gradient background
224
- for y in range(image.height):
225
- for x in range(image.width):
226
- # Create a simple gradient
227
- r = int(200 + (x * 55 / image.width))
228
- g = int(200 + (y * 55 / image.height))
229
- b = 240
230
- draw.point((x, y), fill=(r, g, b))
231
-
232
- # Add some noise/texture
233
- image = image.filter(ImageFilter.GaussianBlur(radius=1))
234
-
235
- # Add text from prompt (truncated)
236
- draw = ImageDraw.Draw(image)
237
- text = prompt[:50] + "..." if len(prompt) > 50 else prompt
238
-
239
- # Position text
240
- text_width = draw.textlength(text, font=None)
241
- text_position = ((image.width - text_width) / 2, image.height / 2)
242
-
243
- # Draw text
244
- draw.text(text_position, text, fill=(0, 0, 0))
245
 
246
- # Save the image
247
- image_path = f"{output_dir}/image_{int(time.time() * 1000)}.png"
248
- image.save(image_path)
 
 
249
 
250
- return image_path
251
-
252
- def generate_images(self, prompts, output_dir="temp", progress_callback=None, parallel=False, max_workers=4):
253
- """Generate images from the prompts"""
254
- # Ensure output directory exists
255
- os.makedirs(output_dir, exist_ok=True)
256
 
257
- if parallel and len(prompts) > 1:
258
- # Generate images in parallel
259
- with ThreadPoolExecutor(max_workers=max_workers) as executor:
260
- # Create a partial function with fixed parameters
261
- generate_func = partial(self.generate_image, output_dir=output_dir)
262
-
263
- # Process prompts in parallel and collect results
264
- if progress_callback:
265
- progress_callback("Generating images in parallel...")
266
-
267
- images = list(executor.map(generate_func, prompts))
 
 
 
 
 
 
 
 
 
 
268
  else:
269
- # Generate images sequentially
270
- images = []
271
- for i, prompt in enumerate(prompts):
272
- if progress_callback:
273
- progress_callback(f"Generating image {i+1}/{len(prompts)}...")
274
-
275
- image_path = self.generate_image(prompt, output_dir)
276
- images.append(image_path)
277
 
278
- return images
279
 
280
- def optimize_image(self, image_path, target_size=None):
281
- """Optimize image size for video creation"""
282
- if target_size is None:
283
- target_size = self.target_size
284
-
285
- img = Image.open(image_path)
286
 
287
- # Resize to target size
288
- img = img.resize(target_size, Image.LANCZOS)
 
 
289
 
290
- # Apply cinematic effects
291
- img = self.apply_cinematic_effects(img)
 
 
 
 
 
292
 
293
- # Save optimized image
294
- img.save(image_path)
 
 
 
 
 
295
 
296
- return image_path
297
-
298
- def optimize_all_images(self, image_paths, target_size=None, parallel=False, max_workers=4):
299
- """Optimize all images for video creation"""
300
- if target_size is None:
301
- target_size = self.target_size
302
-
303
- if parallel and len(image_paths) > 1:
304
- # Optimize images in parallel
305
- with ThreadPoolExecutor(max_workers=max_workers) as executor:
306
- # Create a partial function with fixed parameters
307
- optimize_func = partial(self.optimize_image, target_size=target_size)
308
-
309
- # Process images in parallel
310
- optimized_paths = list(executor.map(optimize_func, image_paths))
311
- else:
312
- # Optimize images sequentially
313
- optimized_paths = []
314
- for path in image_paths:
315
- optimized_path = self.optimize_image(path, target_size)
316
- optimized_paths.append(optimized_path)
317
 
318
- return optimized_paths
319
-
320
- def batch_generate_images(self, prompts, batch_size=2, output_dir="temp", progress_callback=None):
321
- """Generate images in batches to optimize memory usage"""
322
- # Ensure output directory exists
323
- os.makedirs(output_dir, exist_ok=True)
 
 
324
 
325
- images = []
 
326
 
327
- # Process prompts in batches
328
- for i in range(0, len(prompts), batch_size):
329
- batch_prompts = prompts[i:i+batch_size]
330
-
331
- if progress_callback:
332
- progress_callback(f"Generating batch {i//batch_size + 1}/{(len(prompts) + batch_size - 1)//batch_size}...")
333
-
334
- # Generate images for this batch
335
- batch_images = []
336
- for j, prompt in enumerate(batch_prompts):
337
- image_path = self.generate_image(prompt, output_dir)
338
- batch_images.append(image_path)
339
-
340
- # Add batch results to overall results
341
- images.extend(batch_images)
342
-
343
- # Clear CUDA cache if using GPU
344
- if torch.cuda.is_available():
345
- torch.cuda.empty_cache()
346
 
347
- return images
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
 
2
  import os
3
+ import tempfile
4
+ from PIL import Image
5
+ import torch
6
  import time
7
+ import numpy as np
8
+ import gc
9
 
10
  class ImageGenerator:
11
  def __init__(self):
12
  self.model = None
13
+ self.processor = None
14
+ self.target_size = (512, 512)
15
  self.inference_steps = 20
16
+ self.guidance_scale = 7.5
17
  self.aspect_ratio = "1:1" # Default aspect ratio
18
+ self.image_cache = {}
19
 
20
+ def set_aspect_ratio(self, aspect_ratio):
21
+ """Set the aspect ratio for image generation"""
22
+ self.aspect_ratio = aspect_ratio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  def set_target_size(self, size):
25
+ """Set the target size for generated images"""
26
  self.target_size = size
27
 
28
+ def set_inference_steps(self, steps):
29
+ """Set the number of inference steps for image generation"""
30
+ self.inference_steps = steps
 
 
 
31
 
32
+ def get_size_for_aspect_ratio(self, base_size, aspect_ratio=None):
33
+ """Calculate image dimensions based on aspect ratio"""
34
+ if aspect_ratio is None:
35
+ aspect_ratio = self.aspect_ratio
36
+
37
+ # Calculate base pixels (total pixels in the image)
 
 
 
 
 
 
 
 
 
 
 
38
  base_pixels = base_size[0] * base_size[1]
39
 
40
  if aspect_ratio == "1:1":
41
  # Square format
42
  side = int(np.sqrt(base_pixels))
43
+ # Ensure even dimensions for compatibility
44
+ side = side if side % 2 == 0 else side + 1
45
  return (side, side)
46
  elif aspect_ratio == "16:9":
47
  # Landscape format
48
  width = int(np.sqrt(base_pixels * 16 / 9))
49
  height = int(width * 9 / 16)
50
+ # Ensure even dimensions for compatibility
51
  width = width if width % 2 == 0 else width + 1
52
  height = height if height % 2 == 0 else height + 1
53
  return (width, height)
 
55
  # Portrait format
56
  height = int(np.sqrt(base_pixels * 16 / 9))
57
  width = int(height * 9 / 16)
58
+ # Ensure even dimensions for compatibility
59
  width = width if width % 2 == 0 else width + 1
60
  height = height if height % 2 == 0 else height + 1
61
  return (width, height)
62
  else:
63
  # Default to original size
64
  return base_size
65
+
66
+ def load_model(self):
67
+ """Load the image generation model with optimizations for CPU"""
68
+ if self.model is None:
69
+ with st.spinner("Loading image generation model..."):
70
+ try:
71
+ # Force garbage collection before loading model
72
+ gc.collect()
73
+ torch.cuda.empty_cache() if torch.cuda.is_available() else None
74
+
75
+ from diffusers import StableDiffusionPipeline
76
+
77
+ # Use the correct model ID as specified
78
+ model_id = "sd-legacy/stable-diffusion-v1-5"
79
+
80
+ # For CPU-only environments like Hugging Face Spaces free tier
81
+ self.model = StableDiffusionPipeline.from_pretrained(
82
+ model_id,
83
+ torch_dtype=torch.float32, # Use float32 for CPU
84
+ safety_checker=None, # Disable safety checker for speed
85
+ low_cpu_mem_usage=True, # Optimize for low memory
86
+ revision="fp16" # Use fp16 weights but convert to fp32
87
+ )
88
+
89
+ # Optimize for CPU
90
+ self.model = self.model.to("cpu")
91
+
92
+ # Enable memory efficient attention
93
+ if hasattr(self.model, "enable_attention_slicing"):
94
+ self.model.enable_attention_slicing(1)
95
+
96
+ # Enable sequential CPU offload if available
97
+ if hasattr(self.model, "enable_sequential_cpu_offload"):
98
+ self.model.enable_sequential_cpu_offload()
99
+
100
+ # Enable model CPU offloading if available
101
+ if hasattr(self.model, "enable_model_cpu_offload"):
102
+ self.model.enable_model_cpu_offload()
103
+
104
+ # Use smaller VAE scale factor for memory efficiency
105
+ if hasattr(self.model, "vae") and hasattr(self.model.vae, "config"):
106
+ if hasattr(self.model.vae.config, "scaling_factor"):
107
+ self.model.vae.config.scaling_factor = 0.18215 # Default value, explicitly set
108
+
109
+ except Exception as e:
110
+ st.warning(f"Error loading image generation model: {str(e)}. Using fallback method.")
111
+ self.model = None
112
+
113
+ return self.model
114
 
115
+ def generate_image(self, prompt, negative_prompt="blurry, bad quality, distorted, disfigured, low resolution"):
116
+ """Generate an image from a text prompt"""
117
+ # Generate a cache key based on the prompt and settings
118
+ import hashlib
119
+ cache_key = f"{hashlib.md5(prompt.encode()).hexdigest()}_{self.target_size}_{self.inference_steps}_{self.guidance_scale}_{self.aspect_ratio}"
120
+
121
+ # Check if result is in cache
122
+ if cache_key in self.image_cache:
123
+ return self.image_cache[cache_key]
124
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  # Ensure output directory exists
126
+ os.makedirs("temp", exist_ok=True)
127
 
128
  try:
129
  # Load the model if not already loaded
130
  model = self.load_model()
131
 
132
  if model is not None:
133
+ # Enhance the prompt with aspect ratio-specific details
134
+ enhanced_prompt = self.enhance_prompt_for_aspect_ratio(prompt)
 
 
 
 
135
 
136
+ # Force garbage collection before inference
137
+ gc.collect()
138
+ torch.cuda.empty_cache() if torch.cuda.is_available() else None
 
 
 
 
 
 
139
 
140
+ # Generate the image
141
+ with torch.no_grad(): # Disable gradient calculation for memory efficiency
142
+ # Use lower precision during inference
143
+ with torch.autocast("cpu"):
144
+ image = model(
145
+ prompt=enhanced_prompt,
146
+ negative_prompt=negative_prompt,
147
+ num_inference_steps=self.inference_steps,
148
+ guidance_scale=self.guidance_scale,
149
+ width=self.target_size[0],
150
+ height=self.target_size[1]
151
+ ).images[0]
152
 
153
+ # Save the image to a temporary file
154
+ output_path = f"temp/image_{int(time.time() * 1000)}.png"
155
+ image.save(output_path)
 
 
 
 
 
156
 
157
+ # Force garbage collection after inference
158
+ gc.collect()
159
+ torch.cuda.empty_cache() if torch.cuda.is_available() else None
160
 
161
+ # Cache the result
162
+ self.image_cache[cache_key] = output_path
 
163
 
164
+ return output_path
165
+ else:
166
+ # Fallback: Create a simple image with text
167
+ return self.create_fallback_image(prompt)
 
 
 
168
  except Exception as e:
169
  st.warning(f"Error generating image: {str(e)}. Using fallback method.")
170
+ return self.create_fallback_image(prompt)
171
+
172
+ def enhance_prompt_for_aspect_ratio(self, prompt):
173
+ """Enhance the prompt based on the selected aspect ratio"""
174
+ # Base enhancement for all prompts
175
+ base_enhancement = "hyper realistic, photo realistic, ultra detailed, hyper detailed textures, 8K resolution"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
+ # Add cinematic lighting
178
+ lighting_options = [
179
+ "golden hour glow", "moody overcast", "dramatic lighting",
180
+ "soft natural light", "cinematic lighting", "film noir shadows"
181
+ ]
182
 
183
+ # Add camera effects
184
+ camera_effects = [
185
+ "shallow depth of field", "motion blur", "film grain",
186
+ "professional photography", "award winning photograph"
187
+ ]
 
188
 
189
+ # Add environmental details
190
+ environmental_details = [
191
+ "atmospheric", "detailed environment", "rich textures",
192
+ "detailed background", "immersive scene"
193
+ ]
194
+
195
+ # Select enhancements based on aspect ratio
196
+ import random
197
+ random.seed(hash(prompt)) # Use prompt as seed for deterministic selection
198
+
199
+ selected_lighting = random.choice(lighting_options)
200
+ selected_effect = random.choice(camera_effects)
201
+ selected_detail = random.choice(environmental_details)
202
+
203
+ # Aspect ratio specific enhancements
204
+ if self.aspect_ratio == "16:9":
205
+ # Landscape format - cinematic, wide view
206
+ aspect_enhancement = "cinematic wide shot, landscape composition, panoramic view"
207
+ elif self.aspect_ratio == "9:16":
208
+ # Portrait format - vertical composition
209
+ aspect_enhancement = "vertical composition, portrait framing, tall perspective"
210
  else:
211
+ # Square format - balanced composition
212
+ aspect_enhancement = "balanced composition, centered framing, square format"
213
+
214
+ # Combine all enhancements
215
+ enhanced_prompt = f"{prompt}, {base_enhancement}, {selected_lighting}, {selected_effect}, {selected_detail}, {aspect_enhancement}"
 
 
 
216
 
217
+ return enhanced_prompt
218
 
219
+ def create_fallback_image(self, prompt):
220
+ """Create a fallback image when model generation fails"""
221
+ from PIL import Image, ImageDraw, ImageFont
 
 
 
222
 
223
+ # Create a gradient background
224
+ width, height = self.target_size
225
+ image = Image.new('RGB', (width, height), color=(240, 240, 240))
226
+ draw = ImageDraw.Draw(image)
227
 
228
+ # Add a gradient
229
+ for y in range(height):
230
+ r = int(240 * (1 - y / height))
231
+ g = int(240 * (1 - y / height))
232
+ b = int(255 * (1 - y / height * 0.5))
233
+ for x in range(width):
234
+ draw.point((x, y), fill=(r, g, b))
235
 
236
+ # Add text
237
+ try:
238
+ # Try to use a nice font if available
239
+ font = ImageFont.truetype("Arial", 20)
240
+ except:
241
+ # Fallback to default font
242
+ font = ImageFont.load_default()
243
 
244
+ # Wrap text to fit width
245
+ words = prompt.split()
246
+ lines = []
247
+ current_line = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
 
249
+ for word in words:
250
+ test_line = ' '.join(current_line + [word])
251
+ # Estimate text width (approximate method)
252
+ if len(test_line) * 10 < width - 40: # 10 pixels per character, 20 pixel margin on each side
253
+ current_line.append(word)
254
+ else:
255
+ lines.append(' '.join(current_line))
256
+ current_line = [word]
257
 
258
+ if current_line:
259
+ lines.append(' '.join(current_line))
260
 
261
+ # Draw text
262
+ y_position = height // 4
263
+ for line in lines[:8]: # Limit to 8 lines
264
+ draw.text((20, y_position), line, fill=(0, 0, 0), font=font)
265
+ y_position += 30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
 
267
+ # Save the image
268
+ output_path = f"temp/fallback_{int(time.time() * 1000)}.png"
269
+ image.save(output_path)
270
+
271
+ return output_path
272
+
273
+ def clear_cache(self):
274
+ """Clear the image cache"""
275
+ self.image_cache = {}
276
+ return True
transcriber.py CHANGED
@@ -12,6 +12,11 @@ class AudioTranscriber:
12
  self.model = None
13
  self.processor = None
14
  self.transcription_cache = {}
 
 
 
 
 
15
 
16
  def load_model(self):
17
  """Load a lightweight transcription model"""
@@ -33,8 +38,8 @@ class AudioTranscriber:
33
 
34
  return self.model
35
 
36
- def segment_audio(self, audio_file, num_segments=5, min_segment_duration=3.0):
37
- """Segment the audio file into chunks for processing"""
38
  # Save the uploaded audio to a temporary file
39
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
40
  tmp_file.write(audio_file.getvalue())
@@ -47,21 +52,25 @@ class AudioTranscriber:
47
  # Get total duration
48
  duration = librosa.get_duration(y=y, sr=sr)
49
 
 
 
 
 
50
  # Ensure we don't create segments that are too short
51
- actual_segments = min(num_segments, int(duration / min_segment_duration))
52
- if actual_segments < 1:
53
- actual_segments = 1
54
 
55
  # Calculate segment duration
56
- segment_duration = duration / actual_segments
57
 
58
  # Create segments
59
  segments = []
60
  timestamps = []
61
 
62
- for i in range(actual_segments):
63
- start_time = i * segment_duration
64
- end_time = min((i + 1) * segment_duration, duration)
 
 
65
 
66
  # Convert time to samples
67
  start_sample = int(start_time * sr)
@@ -71,6 +80,8 @@ class AudioTranscriber:
71
  segment = y[start_sample:end_sample]
72
  segments.append(segment)
73
  timestamps.append((start_time, end_time))
 
 
74
 
75
  return segments, timestamps
76
 
@@ -82,21 +93,24 @@ class AudioTranscriber:
82
  y, sr = sf.read(audio_path)
83
  duration = len(y) / sr
84
 
 
 
 
85
  # Ensure we don't create segments that are too short
86
- actual_segments = min(num_segments, int(duration / min_segment_duration))
87
- if actual_segments < 1:
88
- actual_segments = 1
89
 
90
  # Calculate segment duration
91
- segment_duration = duration / actual_segments
92
 
93
  # Create segments
94
  segments = []
95
  timestamps = []
96
 
97
- for i in range(actual_segments):
98
- start_time = i * segment_duration
99
- end_time = min((i + 1) * segment_duration, duration)
 
 
100
 
101
  # Convert time to samples
102
  start_sample = int(start_time * sr)
@@ -106,6 +120,8 @@ class AudioTranscriber:
106
  segment = y[start_sample:end_sample]
107
  segments.append(segment)
108
  timestamps.append((start_time, end_time))
 
 
109
 
110
  return segments, timestamps
111
 
@@ -113,7 +129,7 @@ class AudioTranscriber:
113
  st.error(f"Critical error in audio segmentation: {str(inner_e)}")
114
  # Last resort: Create dummy segments
115
  segments = [np.zeros(16000) for _ in range(num_segments)] # 1-second silent segments
116
- timestamps = [(i, i+1) for i in range(num_segments)]
117
  return segments, timestamps
118
  finally:
119
  # Clean up temporary file
 
12
  self.model = None
13
  self.processor = None
14
  self.transcription_cache = {}
15
+ self.max_segment_duration = 5.0 # Maximum segment duration in seconds
16
+
17
+ def set_max_segment_duration(self, duration):
18
+ """Set the maximum duration for any segment in seconds"""
19
+ self.max_segment_duration = duration
20
 
21
  def load_model(self):
22
  """Load a lightweight transcription model"""
 
38
 
39
  return self.model
40
 
41
+ def segment_audio(self, audio_file, num_segments=5, min_segment_duration=1.0):
42
+ """Segment the audio file into chunks for processing with maximum duration limit"""
43
  # Save the uploaded audio to a temporary file
44
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
45
  tmp_file.write(audio_file.getvalue())
 
52
  # Get total duration
53
  duration = librosa.get_duration(y=y, sr=sr)
54
 
55
+ # Calculate ideal number of segments based on max_segment_duration
56
+ # We want to create enough segments so that each is <= max_segment_duration
57
+ ideal_segments = max(num_segments, int(duration / self.max_segment_duration) + 1)
58
+
59
  # Ensure we don't create segments that are too short
60
+ actual_segments = max(ideal_segments, int(duration / min_segment_duration))
 
 
61
 
62
  # Calculate segment duration
63
+ segment_duration = min(duration / actual_segments, self.max_segment_duration)
64
 
65
  # Create segments
66
  segments = []
67
  timestamps = []
68
 
69
+ # Create more segments to ensure each is under max_segment_duration
70
+ current_time = 0
71
+ while current_time < duration:
72
+ start_time = current_time
73
+ end_time = min(start_time + segment_duration, duration)
74
 
75
  # Convert time to samples
76
  start_sample = int(start_time * sr)
 
80
  segment = y[start_sample:end_sample]
81
  segments.append(segment)
82
  timestamps.append((start_time, end_time))
83
+
84
+ current_time = end_time
85
 
86
  return segments, timestamps
87
 
 
93
  y, sr = sf.read(audio_path)
94
  duration = len(y) / sr
95
 
96
+ # Calculate ideal number of segments based on max_segment_duration
97
+ ideal_segments = max(num_segments, int(duration / self.max_segment_duration) + 1)
98
+
99
  # Ensure we don't create segments that are too short
100
+ actual_segments = max(ideal_segments, int(duration / min_segment_duration))
 
 
101
 
102
  # Calculate segment duration
103
+ segment_duration = min(duration / actual_segments, self.max_segment_duration)
104
 
105
  # Create segments
106
  segments = []
107
  timestamps = []
108
 
109
+ # Create more segments to ensure each is under max_segment_duration
110
+ current_time = 0
111
+ while current_time < duration:
112
+ start_time = current_time
113
+ end_time = min(start_time + segment_duration, duration)
114
 
115
  # Convert time to samples
116
  start_sample = int(start_time * sr)
 
120
  segment = y[start_sample:end_sample]
121
  segments.append(segment)
122
  timestamps.append((start_time, end_time))
123
+
124
+ current_time = end_time
125
 
126
  return segments, timestamps
127
 
 
129
  st.error(f"Critical error in audio segmentation: {str(inner_e)}")
130
  # Last resort: Create dummy segments
131
  segments = [np.zeros(16000) for _ in range(num_segments)] # 1-second silent segments
132
+ timestamps = [(i, min(i+1, i+self.max_segment_duration)) for i in range(num_segments)]
133
  return segments, timestamps
134
  finally:
135
  # Clean up temporary file
video_creator.py CHANGED
@@ -12,11 +12,16 @@ class VideoCreator:
12
  os.makedirs("outputs", exist_ok=True)
13
  self.video_cache = {}
14
  self.aspect_ratio = "1:1" # Default aspect ratio
 
15
 
16
  def set_aspect_ratio(self, aspect_ratio):
17
  """Set the aspect ratio for video creation"""
18
  self.aspect_ratio = aspect_ratio
19
 
 
 
 
 
20
  def get_video_dimensions(self, base_size=None):
21
  """Get video dimensions based on aspect ratio"""
22
  if base_size is None:
@@ -62,6 +67,9 @@ class VideoCreator:
62
  def create_segment_clip(self, frames, segment_duration, segment_text=None):
63
  """Create a video clip from frames with optional text overlay"""
64
  try:
 
 
 
65
  # Calculate frame duration based on segment duration
66
  frame_duration = segment_duration / len(frames)
67
 
@@ -128,7 +136,7 @@ class VideoCreator:
128
  """Create a video from animated frames synchronized with audio using parallel processing"""
129
  # Generate a cache key based on inputs
130
  import hashlib
131
- cache_key = f"{hashlib.md5(audio_file.getvalue()).hexdigest()}_{len(animated_frames)}_{self.aspect_ratio}"
132
 
133
  # Check if result is in cache
134
  if cache_key in self.video_cache:
@@ -146,11 +154,11 @@ class VideoCreator:
146
 
147
  # Calculate segment durations
148
  if timestamps:
149
- # Use provided timestamps
150
- segment_durations = [end - start for start, end in timestamps]
151
  else:
152
- # Distribute evenly
153
- segment_durations = [total_duration / len(animated_frames)] * len(animated_frames)
154
 
155
  # Create video clips for each animated segment
156
  video_clips = []
@@ -182,7 +190,7 @@ class VideoCreator:
182
  # Fallback: Create a simple clip for each segment
183
  video_clips = []
184
  for i, _ in enumerate(animated_frames):
185
- segment_duration = segment_durations[min(i, len(segment_durations)-1)]
186
  from moviepy.editor import ColorClip
187
  clip = ColorClip(self.get_video_dimensions(), color=(0, 0, 0), duration=segment_duration)
188
  video_clips.append(clip)
@@ -192,6 +200,9 @@ class VideoCreator:
192
  final_clip = concatenate_videoclips(video_clips)
193
 
194
  # Set the audio
 
 
 
195
  final_clip = final_clip.set_audio(audio_clip)
196
 
197
  # Get target dimensions based on aspect ratio
 
12
  os.makedirs("outputs", exist_ok=True)
13
  self.video_cache = {}
14
  self.aspect_ratio = "1:1" # Default aspect ratio
15
+ self.max_segment_duration = 5.0 # Maximum duration for any segment in seconds
16
 
17
  def set_aspect_ratio(self, aspect_ratio):
18
  """Set the aspect ratio for video creation"""
19
  self.aspect_ratio = aspect_ratio
20
 
21
+ def set_max_segment_duration(self, duration):
22
+ """Set the maximum duration for any segment in seconds"""
23
+ self.max_segment_duration = duration
24
+
25
  def get_video_dimensions(self, base_size=None):
26
  """Get video dimensions based on aspect ratio"""
27
  if base_size is None:
 
67
  def create_segment_clip(self, frames, segment_duration, segment_text=None):
68
  """Create a video clip from frames with optional text overlay"""
69
  try:
70
+ # Limit segment duration to max_segment_duration
71
+ segment_duration = min(segment_duration, self.max_segment_duration)
72
+
73
  # Calculate frame duration based on segment duration
74
  frame_duration = segment_duration / len(frames)
75
 
 
136
  """Create a video from animated frames synchronized with audio using parallel processing"""
137
  # Generate a cache key based on inputs
138
  import hashlib
139
+ cache_key = f"{hashlib.md5(audio_file.getvalue()).hexdigest()}_{len(animated_frames)}_{self.aspect_ratio}_{self.max_segment_duration}"
140
 
141
  # Check if result is in cache
142
  if cache_key in self.video_cache:
 
154
 
155
  # Calculate segment durations
156
  if timestamps:
157
+ # Use provided timestamps but limit to max_segment_duration
158
+ segment_durations = [min(end - start, self.max_segment_duration) for start, end in timestamps]
159
  else:
160
+ # Distribute evenly but limit to max_segment_duration
161
+ segment_durations = [min(total_duration / len(animated_frames), self.max_segment_duration)] * len(animated_frames)
162
 
163
  # Create video clips for each animated segment
164
  video_clips = []
 
190
  # Fallback: Create a simple clip for each segment
191
  video_clips = []
192
  for i, _ in enumerate(animated_frames):
193
+ segment_duration = min(segment_durations[min(i, len(segment_durations)-1)], self.max_segment_duration)
194
  from moviepy.editor import ColorClip
195
  clip = ColorClip(self.get_video_dimensions(), color=(0, 0, 0), duration=segment_duration)
196
  video_clips.append(clip)
 
200
  final_clip = concatenate_videoclips(video_clips)
201
 
202
  # Set the audio
203
+ # If the video is shorter than the audio due to max_segment_duration,
204
+ # we need to trim the audio to match the video duration
205
+ audio_clip = audio_clip.subclip(0, min(final_clip.duration, audio_clip.duration))
206
  final_clip = final_clip.set_audio(audio_clip)
207
 
208
  # Get target dimensions based on aspect ratio