File size: 36,210 Bytes
15ca2ca 7323bbb a87d440 ad6d387 15ca2ca 7323bbb 15ca2ca 7323bbb 15ca2ca 7323bbb 15ca2ca 7323bbb fd65cf5 7323bbb fd65cf5 7323bbb a87d440 7323bbb fd65cf5 a87d440 fd65cf5 7323bbb fd65cf5 ad6d387 fd65cf5 7323bbb ad6d387 7323bbb fd65cf5 ad6d387 fd65cf5 7323bbb 15ca2ca 7323bbb 15ca2ca 7323bbb fd65cf5 7323bbb ad6d387 b4d330b 7323bbb ad6d387 b4d330b ad6d387 b4d330b ad6d387 b4d330b ad6d387 7323bbb a87d440 7323bbb ad6d387 15ca2ca 7323bbb fd65cf5 1107cf8 fd65cf5 1107cf8 fd65cf5 15ca2ca 1107cf8 fd65cf5 1107cf8 fd65cf5 1107cf8 fd65cf5 1107cf8 fd65cf5 1107cf8 fd65cf5 15ca2ca 1107cf8 15ca2ca 1107cf8 7323bbb 15ca2ca 1107cf8 15ca2ca 7323bbb fd65cf5 ad6d387 7323bbb 15ca2ca 7323bbb 15ca2ca 7323bbb 3ca10a7 7323bbb fd65cf5 7323bbb 15ca2ca ad6d387 b4d330b ad6d387 dd0a729 b4d330b 3ca10a7 b4d330b 15ca2ca 7323bbb 15ca2ca 7323bbb fd65cf5 dd0a729 ad6d387 fd65cf5 7323bbb fd65cf5 7323bbb 15ca2ca 7323bbb fd65cf5 b4d330b fd65cf5 7323bbb 15ca2ca 7323bbb fd65cf5 15ca2ca fd65cf5 a87d440 ad6d387 dd0a729 ad6d387 a87d440 ad6d387 a87d440 ad6d387 dd0a729 ad6d387 a87d440 ad6d387 a87d440 56f6fcc a87d440 ad6d387 dd0a729 ad6d387 a87d440 ad6d387 dd0a729 ad6d387 a87d440 ad6d387 a87d440 ad6d387 dd0a729 ad6d387 a87d440 b4d330b a87d440 dd0a729 a87d440 ad6d387 a87d440 ad6d387 dd0a729 a87d440 ad6d387 dd0a729 ad6d387 a87d440 ad6d387 a87d440 ad6d387 a87d440 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 |
import streamlit as st
import os
import tempfile
import time
import concurrent.futures
from functools import partial
import torch
import hashlib
from PIL import Image, ImageDraw
import gc
from transcriber import AudioTranscriber
from prompt_generator import PromptGenerator
from image_generator import ImageGenerator
from animator import Animator
from video_creator import VideoCreator
# Set page configuration
st.set_page_config(
page_title="Audio to Video Converter",
page_icon="π¬",
layout="wide"
)
# Create necessary directories
os.makedirs("temp", exist_ok=True)
os.makedirs("outputs", exist_ok=True)
os.makedirs("cache", exist_ok=True)
# App title and description with improved styling
st.markdown("""
<div style="text-align: center; background-color: #f0f2f6; padding: 20px; border-radius: 10px; margin-bottom: 20px;">
<h1 style="color: #1E88E5;">π¬ Audio to Video Converter</h1>
<p style="font-size: 18px;">Transform your audio into engaging videos with AI-powered visuals</p>
</div>
""", unsafe_allow_html=True)
# App description with better formatting
st.markdown("""
### How it works:
1. π€ **Upload your audio** - We accept WAV, MP3, and OGG formats
2. π€ **AI transcribes your audio** - Using advanced speech recognition
3. πΌοΈ **Generate images from transcription** - AI creates visuals matching your content
4. β¨ **Add animations** - Bring images to life with smooth transitions
5. π **Synchronize with audio** - Perfectly timed to match your speech
6. π₯ **Download your video** - Ready to share on social media
""")
# Initialize components with caching
@st.cache_resource
def get_transcriber():
return AudioTranscriber()
@st.cache_resource
def get_prompt_generator():
return PromptGenerator()
@st.cache_resource
def get_image_generator():
return ImageGenerator()
@st.cache_resource
def get_animator():
return Animator()
@st.cache_resource
def get_video_creator():
return VideoCreator()
# Cache for storing intermediate results
class ResultCache:
def __init__(self):
self.cache_dir = "cache"
os.makedirs(self.cache_dir, exist_ok=True)
def get_cache_path(self, key, extension=".pkl"):
# Create a hash of the key for the filename
hash_obj = hashlib.md5(key.encode())
return os.path.join(self.cache_dir, f"{hash_obj.hexdigest()}{extension}")
def exists(self, key, extension=".pkl"):
cache_path = self.get_cache_path(key, extension)
return os.path.exists(cache_path)
def save(self, key, data, extension=".pkl"):
import pickle
cache_path = self.get_cache_path(key, extension)
with open(cache_path, 'wb') as f:
pickle.dump(data, f)
return cache_path
def load(self, key, extension=".pkl"):
import pickle
cache_path = self.get_cache_path(key, extension)
if os.path.exists(cache_path):
with open(cache_path, 'rb') as f:
return pickle.load(f)
return None
def clear(self):
import shutil
for file in os.listdir(self.cache_dir):
file_path = os.path.join(self.cache_dir, file)
if os.path.isfile(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
# Initialize cache
result_cache = ResultCache()
# Parallel processing functions with error handling
def process_audio_segment(segment, transcriber):
"""Process a single audio segment in parallel"""
try:
return transcriber.transcribe_segment(segment)
except Exception as e:
st.warning(f"Error transcribing segment: {str(e)}. Using empty transcription.")
return ""
def generate_prompt_for_segment(transcription, prompt_generator, aspect_ratio="16:9"):
"""Generate a prompt for a single transcription in parallel"""
try:
return prompt_generator.generate_optimized_prompt(transcription, aspect_ratio)
except Exception as e:
st.warning(f"Error generating prompt: {str(e)}. Using fallback prompt.")
return f"{transcription}, visual scene, detailed, vibrant, cinematic"
def generate_image_for_prompt(prompt, image_generator):
"""Generate an image for a single prompt in parallel"""
try:
# Force garbage collection before generating each image
gc.collect()
torch.cuda.empty_cache() if torch.cuda.is_available() else None
image_path = image_generator.generate_image(prompt)
# Force garbage collection after generating each image
gc.collect()
torch.cuda.empty_cache() if torch.cuda.is_available() else None
return image_path
except Exception as e:
st.warning(f"Error generating image: {str(e)}. Using fallback image.")
# Create a fallback image
from PIL import Image, ImageDraw
img = Image.new('RGB', image_generator.target_size, color=(240, 240, 240))
draw = ImageDraw.Draw(img)
draw.text((10, 10), prompt[:50], fill=(0, 0, 0))
path = f"temp/fallback_{int(time.time() * 1000)}.png"
img.save(path)
return path
def animate_image(image_path, animator, animation_type="random", num_frames=15):
"""Animate a single image in parallel"""
try:
return animator.animate_single_image(image_path, animation_type, num_frames=num_frames)
except Exception as e:
st.warning(f"Error animating image: {str(e)}. Using static frames.")
# Create a sequence of identical frames as fallback
frames = []
for i in range(10):
frames.append(image_path)
return frames
# Main app flow
def main():
# Settings sidebar with improved UI
with st.sidebar:
st.markdown("## βοΈ Settings")
# Video Format Settings
st.markdown("### πΉ Video Format")
with st.expander("Aspect Ratio", expanded=True):
aspect_ratio = st.radio(
"Select video format",
options=["16:9 (Landscape)", "1:1 (Square)", "9:16 (Portrait)"],
index=0, # Default to landscape
help="Choose the aspect ratio for your video"
)
# Map the selected option to actual aspect ratio
aspect_ratio_map = {
"16:9 (Landscape)": "16:9",
"1:1 (Square)": "1:1",
"9:16 (Portrait)": "9:16"
}
selected_aspect_ratio = aspect_ratio_map[aspect_ratio]
# Show preview of aspect ratio
col1, col2 = st.columns([1, 2])
with col1:
st.markdown("Preview:")
with col2:
if selected_aspect_ratio == "16:9":
st.markdown('<div style="background-color: #ddd; width: 160px; height: 90px; border-radius: 5px;"></div>', unsafe_allow_html=True)
elif selected_aspect_ratio == "1:1":
st.markdown('<div style="background-color: #ddd; width: 120px; height: 120px; border-radius: 5px;"></div>', unsafe_allow_html=True)
elif selected_aspect_ratio == "9:16":
st.markdown('<div style="background-color: #ddd; width: 90px; height: 160px; border-radius: 5px;"></div>', unsafe_allow_html=True)
# Performance settings with better organization
st.markdown("### π Performance")
with st.expander("Processing Options", expanded=True):
parallel_processing = st.toggle("Enable parallel processing", value=True,
help="Process multiple tasks simultaneously for faster results")
max_workers = st.slider("Max parallel workers", min_value=2, max_value=8, value=4,
help="Number of simultaneous tasks (higher values may use more memory)")
use_caching = st.toggle("Enable result caching", value=True,
help="Save results to speed up repeated conversions")
# Memory optimization settings
memory_optimization = st.toggle("Enable memory optimization", value=True,
help="Reduce memory usage (recommended for Hugging Face Spaces)")
# VRAM optimization settings
vram_optimization = st.toggle("Enable VRAM optimization", value=True,
help="Use techniques to reduce VRAM usage on GPU (highly recommended for Hugging Face)")
# Content settings
st.markdown("### π¨ Content")
with st.expander("Segmentation", expanded=True):
# New setting for maximum segment duration
max_segment_duration = st.slider(
"Maximum image duration (seconds)",
min_value=3.0,
max_value=5.0,
value=4.0,
step=0.5,
help="Each image will stay on screen between 3-5 seconds for optimal results"
)
# Adjust number of segments based on max duration
st.info("More images will be created to ensure each stays under the maximum duration")
num_segments = st.slider("Minimum number of segments", min_value=2, max_value=20, value=5,
help="Minimum number of scenes to create in your video")
animation_type = st.selectbox(
"Animation style",
["random", "zoom", "pan_right", "pan_left", "fade_in", "ken_burns"],
help="Choose how images will animate in your video"
)
# Animation frames setting
frames_per_animation = st.slider(
"Animation smoothness",
min_value=10,
max_value=20,
value=15,
help="Higher values create smoother animations but may increase processing time"
)
# Advanced settings
st.markdown("### π§ Advanced")
with st.expander("Image Settings"):
# Using radio buttons for image size
image_size_option = st.radio(
"Image Quality",
options=["Low (256x256)", "Medium (384x384)", "High (512x512)"],
index=1, # Default to medium
help="Higher quality creates better images but takes longer"
)
# Map the selected option to base size (actual dimensions will be adjusted for aspect ratio)
image_size_map = {
"Low (256x256)": (256, 256),
"Medium (384x384)": (384, 384),
"High (512x512)": (512, 512)
}
base_image_size = image_size_map[image_size_option]
inference_steps = st.slider("Generation Detail", min_value=10, max_value=50, value=20,
help="Higher values create more detailed images but take longer")
with st.expander("Video Settings"):
video_quality = st.radio(
"Video Quality",
options=["Low", "Medium", "High"],
index=1, # Default to medium
help="Higher quality creates larger files"
)
# Map quality to bitrate
bitrate_map = {
"Low": "800k",
"Medium": "1200k",
"High": "2000k"
}
bitrate = bitrate_map[video_quality]
# Clear cache button
if st.button("π§Ή Clear Cache", help="Remove all cached results to free up disk space"):
result_cache.clear()
st.success("Cache cleared successfully!")
# About section
st.markdown("---")
st.markdown("### π About")
st.markdown("""
This app uses AI to convert audio to video.
Optimized for Hugging Face Spaces with:
- Multiple video formats (16:9, 1:1, 9:16)
- Dynamic image timing (5 seconds or less)
- Parallel processing
- Memory-efficient models
- Result caching
- Batch processing
""")
# Main content area
# File uploader with better styling
st.markdown("### π Upload Your Audio")
audio_file = st.file_uploader("Select an audio file (WAV, MP3, OGG)", type=["wav", "mp3", "ogg"])
if audio_file is not None:
# Display audio player with better styling
st.markdown("### π΅ Preview Your Audio")
st.audio(audio_file)
# Generate a cache key based on the audio file and settings
audio_bytes = audio_file.getvalue()
settings_str = f"{num_segments}_{max_segment_duration}_{animation_type}_{frames_per_animation}_{base_image_size[0]}x{base_image_size[1]}_{inference_steps}_{video_quality}_{selected_aspect_ratio}_{memory_optimization}_{vram_optimization}"
cache_key = hashlib.md5((hashlib.md5(audio_bytes).hexdigest() + settings_str).encode()).hexdigest()
# Process button with better styling
st.markdown("### π Process Your Audio")
convert_col, time_col = st.columns([3, 1])
with convert_col:
convert_button = st.button("π¬ Convert to Video", type="primary", use_container_width=True)
with time_col:
st.info("Processing time: ~1-3 minutes")
# Check if result is already in cache
if use_caching and result_cache.exists(cache_key, ".mp4") and convert_button:
output_video = result_cache.get_cache_path(cache_key, ".mp4")
st.success("β
Found cached result! Loading video...")
# Display the cached video
st.markdown("### π₯ Your Video")
st.video(output_video)
with open(output_video, "rb") as file:
st.download_button(
label="π₯ Download Video",
data=file,
file_name=f"audio_to_video_{selected_aspect_ratio.replace(':', '_')}.mp4",
mime="video/mp4",
use_container_width=True
)
return
if convert_button:
# Initialize progress tracking with better UI
progress_container = st.container()
with progress_container:
progress_bar = st.progress(0)
status_text = st.empty()
# Add a processing animation
processing_col1, processing_col2 = st.columns([1, 3])
with processing_col1:
st.markdown("### Processing:")
with processing_col2:
status_message = st.empty()
try:
# Force garbage collection before starting
if memory_optimization or vram_optimization:
gc.collect()
torch.cuda.empty_cache() if torch.cuda.is_available() else None
# Store VRAM optimization settings to apply after initialization
apply_vram_optimization = vram_optimization
# Adjust parameters for VRAM optimization if enabled
if vram_optimization:
# Set lower inference steps when VRAM optimization is enabled
if inference_steps > 25:
inference_steps = 25
# Use smaller base image size when VRAM optimization is enabled
if base_image_size[0] > 512 or base_image_size[1] > 512:
base_image_size = (512, 512)
# Step 1: Initialize components
status_text.text("Initializing components...")
status_message.markdown("π **Setting up AI models...**")
transcriber = get_transcriber()
prompt_generator = get_prompt_generator()
image_generator = get_image_generator()
animator = get_animator()
video_creator = get_video_creator()
# Set aspect ratio for all components
image_generator.set_aspect_ratio(selected_aspect_ratio)
animator.set_aspect_ratio(selected_aspect_ratio)
video_creator.set_aspect_ratio(selected_aspect_ratio)
# Apply VRAM optimization if enabled
if apply_vram_optimization:
image_generator.set_vram_optimization(True)
# Set maximum segment duration
transcriber.set_max_segment_duration(max_segment_duration)
video_creator.set_max_segment_duration(max_segment_duration)
# Set animation frames
animator.set_frames_per_animation(frames_per_animation)
# Calculate actual image size based on aspect ratio
actual_image_size = image_generator.get_size_for_aspect_ratio(base_image_size, selected_aspect_ratio)
# Update image generator settings
image_generator.set_inference_steps(inference_steps)
image_generator.set_target_size(actual_image_size)
progress_bar.progress(10)
# Step 2: Segment and transcribe audio
status_text.text("Segmenting audio...")
status_message.markdown("π **Analyzing audio...**")
try:
audio_segments, timestamps = transcriber.segment_audio(audio_file, num_segments=num_segments)
except Exception as e:
st.warning(f"Error segmenting audio: {str(e)}. Using simplified segmentation.")
# Fallback: Create empty segments
import numpy as np
segment_duration = 4.0 # Default to 4-second segments (within 3-5 second range)
audio_segments = [np.zeros(int(16000 * segment_duration)) for _ in range(num_segments)] # 4-second silent segments
total_duration = segment_duration * num_segments
timestamps = [(i*segment_duration, (i+1)*segment_duration) for i in range(num_segments)]
progress_bar.progress(15)
# Transcribe segments in parallel if enabled
status_text.text("Transcribing audio segments...")
status_message.markdown("π€ **Converting speech to text...**")
if parallel_processing:
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
# Create a partial function with the transcriber
process_func = partial(process_audio_segment, transcriber=transcriber)
# Process segments in parallel
transcriptions = list(executor.map(process_func, audio_segments))
else:
transcriptions = []
for segment in audio_segments:
try:
trans = transcriber.transcribe_segment(segment)
transcriptions.append(trans)
except Exception as e:
st.warning(f"Error transcribing segment: {str(e)}. Using empty transcription.")
transcriptions.append("")
# Force garbage collection after transcription
if memory_optimization or apply_vram_optimization:
gc.collect()
torch.cuda.empty_cache() if torch.cuda.is_available() else None
# Display transcriptions with better styling
progress_bar.progress(30)
st.markdown("### π Transcriptions")
for i, (trans, (start, end)) in enumerate(zip(transcriptions, timestamps)):
st.markdown(f"""
<div style="background-color: #f0f2f6; padding: 10px; border-radius: 5px; margin-bottom: 10px;">
<strong>Segment {i+1} ({start:.1f}s - {end:.1f}s):</strong> {trans}
</div>
""", unsafe_allow_html=True)
# Step 3: Generate prompts in parallel
status_text.text("Generating prompts from transcriptions...")
status_message.markdown("βοΈ **Creating image descriptions...**")
if parallel_processing:
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
# Create a partial function with the prompt generator and aspect ratio
prompt_func = partial(generate_prompt_for_segment,
prompt_generator=prompt_generator,
aspect_ratio=selected_aspect_ratio)
# Generate prompts in parallel
prompts = list(executor.map(prompt_func, transcriptions))
else:
prompts = []
for trans in transcriptions:
try:
prompt = prompt_generator.generate_optimized_prompt(trans, selected_aspect_ratio)
prompts.append(prompt)
except Exception as e:
st.warning(f"Error generating prompt: {str(e)}. Using fallback prompt.")
prompts.append(f"{trans}, visual scene, detailed, vibrant, cinematic")
# Display prompts with better styling
progress_bar.progress(40)
st.markdown("### ποΈ Generated Prompts")
for i, prompt in enumerate(prompts):
st.markdown(f"""
<div style="background-color: #e8f4f8; padding: 10px; border-radius: 5px; margin-bottom: 10px;">
<strong>Prompt {i+1}:</strong> {prompt}
</div>
""", unsafe_allow_html=True)
# Step 4: Generate images in parallel or batches
status_text.text("Generating images from prompts...")
status_message.markdown("π¨ **Creating images...**")
# For memory optimization, process in smaller batches even with parallel processing
if memory_optimization or apply_vram_optimization:
batch_size = 2 # Process only 2 images at a time to conserve memory
images = []
for i in range(0, len(prompts), batch_size):
batch_prompts = prompts[i:i+batch_size]
status_text.text(f"Generating images {i+1}-{min(i+batch_size, len(prompts))}/{len(prompts)}...")
if parallel_processing and batch_size > 1:
with concurrent.futures.ThreadPoolExecutor(max_workers=min(batch_size, max_workers)) as executor:
# Create a partial function with the image generator
image_func = partial(generate_image_for_prompt, image_generator=image_generator)
# Generate images in parallel within the batch
batch_images = list(executor.map(image_func, batch_prompts))
else:
batch_images = []
for prompt in batch_prompts:
img_path = generate_image_for_prompt(prompt, image_generator)
batch_images.append(img_path)
images.extend(batch_images)
# Force garbage collection after each batch
gc.collect()
torch.cuda.empty_cache() if torch.cuda.is_available() else None
else:
# Standard processing without special memory considerations
if parallel_processing:
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
# Create a partial function with the image generator
image_func = partial(generate_image_for_prompt, image_generator=image_generator)
# Generate images in parallel
images = list(executor.map(image_func, prompts))
else:
images = []
for i, prompt in enumerate(prompts):
status_text.text(f"Generating image {i+1}/{len(prompts)}...")
img_path = generate_image_for_prompt(prompt, image_generator)
images.append(img_path)
# Display images with better styling
progress_bar.progress(60)
st.markdown("### πΌοΈ Generated Images")
image_cols = st.columns(min(len(images), 3))
for i, img_path in enumerate(images):
with image_cols[i % len(image_cols)]:
try:
# Verify image exists and is valid
if os.path.exists(img_path):
# Try to open and verify the image
from PIL import Image
try:
img = Image.open(img_path)
# Convert to RGB if needed
if img.mode != "RGB":
img = img.convert("RGB")
# Save as JPEG to ensure compatibility
safe_path = f"temp/safe_image_{int(time.time() * 1000)}_{i}.jpg"
img.save(safe_path, format="JPEG", quality=95)
# Display the safe image
st.image(safe_path, caption=f"Image {i+1}", use_container_width=True)
except Exception as e:
st.error(f"Error loading image {i+1}: {str(e)}")
else:
st.warning(f"Image {i+1} not found")
except Exception as e:
st.error(f"Error displaying image {i+1}: {str(e)}")
# Force garbage collection after image generation
if memory_optimization or apply_vram_optimization:
gc.collect()
torch.cuda.empty_cache() if torch.cuda.is_available() else None
# Step 5: Add animations in parallel or batches
status_text.text("Adding animations to images...")
status_message.markdown("β¨ **Adding animations...**")
# For memory optimization, process in smaller batches
if memory_optimization or apply_vram_optimization:
batch_size = 3 # Process only 3 animations at a time
animated_frames = []
for i in range(0, len(images), batch_size):
batch_images = images[i:i+batch_size]
status_text.text(f"Animating images {i+1}-{min(i+batch_size, len(images))}/{len(images)}...")
if parallel_processing and batch_size > 1:
with concurrent.futures.ThreadPoolExecutor(max_workers=min(batch_size, max_workers)) as executor:
# Create a partial function with the animator, animation type, and frames
animate_func = partial(animate_image,
animator=animator,
animation_type=animation_type,
num_frames=frames_per_animation)
# Animate images in parallel within the batch
batch_frames = list(executor.map(animate_func, batch_images))
else:
batch_frames = []
for img_path in batch_images:
frames = animate_image(img_path, animator, animation_type, frames_per_animation)
batch_frames.append(frames)
animated_frames.extend(batch_frames)
# Force garbage collection after each batch
gc.collect()
torch.cuda.empty_cache() if torch.cuda.is_available() else None
else:
# Standard processing without special memory considerations
if parallel_processing:
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
# Create a partial function with the animator, animation type, and frames
animate_func = partial(animate_image,
animator=animator,
animation_type=animation_type,
num_frames=frames_per_animation)
# Animate images in parallel
animated_frames = list(executor.map(animate_func, images))
else:
animated_frames = []
for i, img_path in enumerate(images):
status_text.text(f"Animating image {i+1}/{len(images)}...")
frames = animator.animate_single_image(
img_path,
animation_type,
num_frames=frames_per_animation
)
animated_frames.append(frames)
progress_bar.progress(80)
# Force garbage collection before video creation
if memory_optimization or apply_vram_optimization:
gc.collect()
torch.cuda.empty_cache() if torch.cuda.is_available() else None
# Step 6: Create video
status_text.text("Creating final video...")
status_message.markdown("π¬ **Assembling video...**")
output_video = video_creator.create_video_from_frames(
animated_frames,
audio_file,
segments=transcriptions,
timestamps=timestamps,
parallel=parallel_processing and not (memory_optimization or vram_optimization), # Disable parallel for memory/VRAM optimization
max_workers=max_workers
)
# Check if output is an error file
if output_video.endswith('.txt'):
with open(output_video, 'r') as f:
error_message = f.read()
st.error(f"Error creating video: {error_message}")
st.stop()
# Optimize video if needed
if video_quality != "High":
status_text.text("Optimizing video for web...")
status_message.markdown("βοΈ **Optimizing video...**")
output_video = video_creator.optimize_video(
output_video,
bitrate=bitrate,
threads=2 if memory_optimization or apply_vram_optimization else max_workers # Use fewer threads for optimization
)
# Cache the result if caching is enabled
if use_caching:
import shutil
cached_path = result_cache.get_cache_path(cache_key, ".mp4")
shutil.copy(output_video, cached_path)
progress_bar.progress(100)
status_text.text("Video creation complete!")
status_message.markdown("β
**Done!**")
# Step 7: Display and provide download link with better styling
st.markdown("### π₯ Your Video")
st.video(output_video)
st.markdown("### π₯ Download")
with open(output_video, "rb") as file:
st.download_button(
label="π₯ Download Video",
data=file,
file_name=f"audio_to_video_{selected_aspect_ratio.replace(':', '_')}.mp4",
mime="video/mp4",
use_container_width=True
)
# Performance metrics
st.markdown("### β±οΈ Performance Metrics")
st.info(f"""
- Video Format: {aspect_ratio}
- Max Image Duration: {max_segment_duration} seconds
- Number of Segments: {len(audio_segments)}
- Parallel Processing: {'Enabled' if parallel_processing else 'Disabled'}
- Memory Optimization: {'Enabled' if memory_optimization else 'Disabled'}
- VRAM Optimization: {'Enabled' if apply_vram_optimization else 'Disabled'}
- Workers: {max_workers}
- Image Size: {actual_image_size[0]}x{actual_image_size[1]}
- Inference Steps: {inference_steps}
- Video Quality: {video_quality}
""")
# Clean up temporary files
status_text.text("Cleaning up temporary files...")
for path in images + [p for frames in animated_frames for p in frames]:
if os.path.exists(path):
try:
os.remove(path)
except:
pass
# Final garbage collection
if memory_optimization or apply_vram_optimization:
gc.collect()
torch.cuda.empty_cache() if torch.cuda.is_available() else None
status_text.text("All done! Your video is ready for download.")
except Exception as e:
st.error(f"An error occurred: {str(e)}")
st.exception(e)
# Provide troubleshooting tips
st.markdown("### π§ Troubleshooting Tips")
st.info("""
- Try enabling memory optimization
- Use a smaller image size
- Reduce inference steps
- Reduce the number of segments
- Make sure your audio file is in a supported format
- Clear the cache and try again
""")
if __name__ == "__main__":
main()
|