Spaces:

garyuzair
/

Video-Fx

Running

App Files Files Community

garyuzair commited on Mar 13, 2025

Commit

7323bbb

verified ·

1 Parent(s): c8e5c2a

Upload 7 files

Browse files

Files changed (7) hide show

animator.py +83 -30
app.py +304 -68
image_generator.py +140 -28
prompt_generator.py +49 -18
requirements.txt +1 -1
transcriber.py +40 -9
video_creator.py +74 -37

animator.py CHANGED Viewed

@@ -2,15 +2,21 @@ import streamlit as st
 import os
 import numpy as np
 from PIL import Image
-import tempfile
 import time
 class Animator:
     def __init__(self):
-        pass
     def add_zoom_animation(self, image_path, num_frames=10, zoom_factor=1.05, output_dir="temp"):
         """Add a simple zoom animation to an image"""
         # Ensure output directory exists
         os.makedirs(output_dir, exist_ok=True)
@@ -34,10 +40,17 @@ class Animator:
             new_img.save(frame_path)
             frames.append(frame_path)
         return frames
     def add_pan_animation(self, image_path, num_frames=10, direction="right", output_dir="temp"):
         """Add a simple panning animation to an image"""
         # Ensure output directory exists
         os.makedirs(output_dir, exist_ok=True)
@@ -77,10 +90,17 @@ class Animator:
             new_img.save(frame_path)
             frames.append(frame_path)
         return frames
     def add_fade_animation(self, image_path, num_frames=10, fade_type="in", output_dir="temp"):
         """Add a fade in/out animation to an image"""
         # Ensure output directory exists
         os.makedirs(output_dir, exist_ok=True)
@@ -108,37 +128,70 @@ class Animator:
             new_img.convert("RGB").save(frame_path)
             frames.append(frame_path)
         return frames
-    def animate_images(self, image_paths, animation_type="random", output_dir="temp", progress_callback=None):
-        """Add animations to a list of images"""
-        all_animated_frames = []
         animation_types = ["zoom", "pan_right", "pan_left", "fade_in"]
-        for i, img_path in enumerate(image_paths):
-            if progress_callback:
-                progress_callback(f"Animating image {i+1}/{len(image_paths)}...")
-            # Choose animation type
-            if animation_type == "random":
-                chosen_type = animation_types[i % len(animation_types)]
-            else:
-                chosen_type = animation_type
-            # Apply the chosen animation
-            if chosen_type.startswith("pan"):
-                direction = chosen_type.split("_")[1] if "_" in chosen_type else "right"
-                frames = self.add_pan_animation(img_path, direction=direction, output_dir=output_dir)
-            elif chosen_type.startswith("fade"):
-                fade_type = chosen_type.split("_")[1] if "_" in chosen_type else "in"
-                frames = self.add_fade_animation(img_path, fade_type=fade_type, output_dir=output_dir)
-            else:  # Default to zoom
-                frames = self.add_zoom_animation(img_path, output_dir=output_dir)
-            all_animated_frames.append(frames)
-            # Small delay to prevent resource exhaustion
-            time.sleep(0.1)
         return all_animated_frames

 import os
 import numpy as np
 from PIL import Image
 import time
+from concurrent.futures import ThreadPoolExecutor
+from functools import partial
 class Animator:
     def __init__(self):
+        self.frame_cache = {}
     def add_zoom_animation(self, image_path, num_frames=10, zoom_factor=1.05, output_dir="temp"):
         """Add a simple zoom animation to an image"""
+        # Check cache first
+        cache_key = f"zoom_{image_path}_{num_frames}_{zoom_factor}"
+        if cache_key in self.frame_cache:
+            return self.frame_cache[cache_key]
         # Ensure output directory exists
         os.makedirs(output_dir, exist_ok=True)
             new_img.save(frame_path)
             frames.append(frame_path)
+        # Cache the result
+        self.frame_cache[cache_key] = frames
         return frames
     def add_pan_animation(self, image_path, num_frames=10, direction="right", output_dir="temp"):
         """Add a simple panning animation to an image"""
+        # Check cache first
+        cache_key = f"pan_{image_path}_{num_frames}_{direction}"
+        if cache_key in self.frame_cache:
+            return self.frame_cache[cache_key]
         # Ensure output directory exists
         os.makedirs(output_dir, exist_ok=True)
             new_img.save(frame_path)
             frames.append(frame_path)
+        # Cache the result
+        self.frame_cache[cache_key] = frames
         return frames
     def add_fade_animation(self, image_path, num_frames=10, fade_type="in", output_dir="temp"):
         """Add a fade in/out animation to an image"""
+        # Check cache first
+        cache_key = f"fade_{image_path}_{num_frames}_{fade_type}"
+        if cache_key in self.frame_cache:
+            return self.frame_cache[cache_key]
         # Ensure output directory exists
         os.makedirs(output_dir, exist_ok=True)
             new_img.convert("RGB").save(frame_path)
             frames.append(frame_path)
+        # Cache the result
+        self.frame_cache[cache_key] = frames
         return frames
+    def animate_single_image(self, img_path, animation_type="random", output_dir="temp"):
+        """Animate a single image"""
+        # Choose animation type
         animation_types = ["zoom", "pan_right", "pan_left", "fade_in"]
+        if animation_type == "random":
+            # Use hash of image path to deterministically select animation type
+            chosen_type = animation_types[hash(img_path) % len(animation_types)]
+        else:
+            chosen_type = animation_type
+        # Apply the chosen animation
+        if chosen_type.startswith("pan"):
+            direction = chosen_type.split("_")[1] if "_" in chosen_type else "right"
+            frames = self.add_pan_animation(img_path, direction=direction, output_dir=output_dir)
+        elif chosen_type.startswith("fade"):
+            fade_type = chosen_type.split("_")[1] if "_" in chosen_type else "in"
+            frames = self.add_fade_animation(img_path, fade_type=fade_type, output_dir=output_dir)
+        else:  # Default to zoom
+            frames = self.add_zoom_animation(img_path, output_dir=output_dir)
+        return frames
+    def animate_images(self, image_paths, animation_type="random", output_dir="temp",
+                      progress_callback=None, parallel=False, max_workers=4, batch_size=2):
+        """Add animations to a list of images with parallel processing and batching"""
+        all_animated_frames = []
+        if parallel and len(image_paths) > 1:
+            # Process in parallel using ThreadPoolExecutor
+            with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                # Create a partial function with fixed parameters
+                animate_func = partial(self.animate_single_image,
+                                      animation_type=animation_type,
+                                      output_dir=output_dir)
+                # Process images in parallel
+                if progress_callback:
+                    progress_callback("Animating images in parallel...")
+                # Map and collect results
+                all_animated_frames = list(executor.map(animate_func, image_paths))
+        else:
+            # Process in batches
+            for i in range(0, len(image_paths), batch_size):
+                batch = image_paths[i:i+batch_size]
+                if progress_callback:
+                    progress_callback(f"Animating batch {i//batch_size + 1}/{(len(image_paths) + batch_size - 1)//batch_size}...")
+                batch_frames = []
+                for img_path in batch:
+                    frames = self.animate_single_image(img_path, animation_type, output_dir)
+                    batch_frames.append(frames)
+                all_animated_frames.extend(batch_frames)
         return all_animated_frames
+    def clear_cache(self):
+        """Clear the animation frame cache"""
+        self.frame_cache = {}
+        return True

app.py CHANGED Viewed

@@ -2,6 +2,10 @@ import streamlit as st
 import os
 import tempfile
 import time
 from transcriber import AudioTranscriber
 from prompt_generator import PromptGenerator
@@ -19,17 +23,25 @@ st.set_page_config(
 # Create necessary directories
 os.makedirs("temp", exist_ok=True)
 os.makedirs("outputs", exist_ok=True)
-# App title and description
-st.title("🎬 Audio to Video Converter")
 st.markdown("""
-This app converts your audio into a video by:
-1. Transcribing your audio
-2. Generating prompts from the transcription
-3. Creating images based on those prompts
-4. Adding animations to the images
-5. Synchronizing with the audio
-6. Providing a downloadable video
 """)
 # Initialize components with caching
@@ -53,35 +65,111 @@ def get_animator():
 def get_video_creator():
     return VideoCreator()
 # Main app flow
 def main():
-    # File uploader for audio
-    audio_file = st.file_uploader("Upload your audio file (WAV, MP3, etc.)", type=["wav", "mp3", "ogg"])
-    # Settings sidebar
     with st.sidebar:
-        st.header("Settings")
-        num_segments = st.slider("Number of segments", min_value=2, max_value=10, value=5)
-        animation_type = st.selectbox(
-            "Animation type",
-            ["random", "zoom", "pan_right", "pan_left", "fade_in"]
-        )
         # Advanced settings
-        st.subheader("Advanced Settings")
-        with st.expander("Image Generation"):
             image_size = st.select_slider(
                 "Image Size",
                 options=[(256, 256), (384, 384), (512, 512)],
-                value=(512, 512),
-                format_func=lambda x: f"{x[0]}x{x[1]}"
             )
         with st.expander("Video Settings"):
             video_quality = st.select_slider(
                 "Video Quality",
                 options=["low", "medium", "high"],
-                value="medium"
             )
             # Map quality to bitrate
@@ -91,108 +179,246 @@ def main():
                 "high": "2000k"
             }
             bitrate = bitrate_map[video_quality]
     if audio_file is not None:
-        # Display audio player
         st.audio(audio_file)
-        # Process button
-        if st.button("Convert to Video"):
-            # Initialize progress tracking
-            progress_bar = st.progress(0)
-            status_text = st.empty()
             try:
                 # Step 1: Initialize components
                 status_text.text("Initializing components...")
                 transcriber = get_transcriber()
                 prompt_generator = get_prompt_generator()
                 image_generator = get_image_generator()
                 animator = get_animator()
                 video_creator = get_video_creator()
                 progress_bar.progress(10)
                 # Step 2: Segment and transcribe audio
-                status_text.text("Segmenting and transcribing audio...")
                 audio_segments, timestamps = transcriber.segment_audio(audio_file, num_segments=num_segments)
-                transcriptions = transcriber.transcribe_segments(audio_segments)
-                # Display transcriptions
-                st.subheader("Transcriptions")
-                for i, (trans, (start, end)) in enumerate(zip(transcriptions, timestamps)):
-                    st.write(f"**Segment {i+1} ({start:.1f}s - {end:.1f}s):** {trans}")
                 progress_bar.progress(30)
-                # Step 3: Generate prompts
                 status_text.text("Generating prompts from transcriptions...")
-                prompts = prompt_generator.generate_optimized_prompts(transcriptions)
-                # Display prompts
-                st.subheader("Generated Prompts")
-                for i, prompt in enumerate(prompts):
-                    st.write(f"**Prompt {i+1}:** {prompt}")
                 progress_bar.progress(40)
-                # Step 4: Generate images
                 status_text.text("Generating images from prompts...")
-                images = image_generator.generate_images(
-                    prompts,
-                    progress_callback=status_text.text
-                )
-                # Optimize images
-                status_text.text("Optimizing images...")
-                optimized_images = image_generator.optimize_all_images(images, target_size=image_size)
-                # Display images
-                st.subheader("Generated Images")
-                cols = st.columns(min(len(optimized_images), 3))
-                for i, img_path in enumerate(optimized_images):
-                    cols[i % len(cols)].image(img_path, caption=f"Image {i+1}")
                 progress_bar.progress(60)
-                # Step 5: Add animations
                 status_text.text("Adding animations to images...")
-                animated_frames = animator.animate_images(
-                    optimized_images,
-                    animation_type=animation_type,
-                    progress_callback=status_text.text
-                )
                 progress_bar.progress(80)
                 # Step 6: Create video
                 status_text.text("Creating final video...")
                 output_video = video_creator.create_video_from_frames(
                     animated_frames,
                     audio_file,
                     segments=transcriptions,
-                    timestamps=timestamps
                 )
                 # Optimize video if needed
                 if video_quality != "high":
                     status_text.text("Optimizing video for web...")
                     output_video = video_creator.optimize_video(
                         output_video,
                         target_size=(640, 480) if video_quality == "low" else (854, 480),
-                        bitrate=bitrate
                     )
                 progress_bar.progress(100)
                 status_text.text("Video creation complete!")
-                # Step 7: Display and provide download link
-                st.subheader("Output Video")
                 st.video(output_video)
                 with open(output_video, "rb") as file:
                     st.download_button(
-                        label="Download Video",
                         data=file,
                         file_name="audio_to_video.mp4",
-                        mime="video/mp4"
                     )
                 # Clean up temporary files
                 status_text.text("Cleaning up temporary files...")
                 for path in images + [p for frames in animated_frames for p in frames]:
@@ -207,6 +433,16 @@ def main():
             except Exception as e:
                 st.error(f"An error occurred: {str(e)}")
                 st.exception(e)
 if __name__ == "__main__":
     main()

 import os
 import tempfile
 import time
+import concurrent.futures
+from functools import partial
+import torch
+import hashlib
 from transcriber import AudioTranscriber
 from prompt_generator import PromptGenerator
 # Create necessary directories
 os.makedirs("temp", exist_ok=True)
 os.makedirs("outputs", exist_ok=True)
+os.makedirs("cache", exist_ok=True)
+# App title and description with improved styling
 st.markdown("""
+<div style="text-align: center; background-color: #f0f2f6; padding: 20px; border-radius: 10px; margin-bottom: 20px;">
+    <h1 style="color: #1E88E5;">🎬 Audio to Video Converter</h1>
+    <p style="font-size: 18px;">Transform your audio into engaging videos with AI-powered visuals</p>
+</div>
+""", unsafe_allow_html=True)
+# App description with better formatting
+st.markdown("""
+### How it works:
+1. 🎤 **Upload your audio** - We accept WAV, MP3, and OGG formats
+2. 🔤 **AI transcribes your audio** - Using advanced speech recognition
+3. 🖼️ **Generate images from transcription** - AI creates visuals matching your content
+4. ✨ **Add animations** - Bring images to life with smooth transitions
+5. 🔄 **Synchronize with audio** - Perfectly timed to match your speech
+6. 📥 **Download your video** - Ready to share on social media
 """)
 # Initialize components with caching
 def get_video_creator():
     return VideoCreator()
+# Cache for storing intermediate results
+class ResultCache:
+    def __init__(self):
+        self.cache_dir = "cache"
+        os.makedirs(self.cache_dir, exist_ok=True)
+    def get_cache_path(self, key, extension=".pkl"):
+        # Create a hash of the key for the filename
+        hash_obj = hashlib.md5(key.encode())
+        return os.path.join(self.cache_dir, f"{hash_obj.hexdigest()}{extension}")
+    def exists(self, key, extension=".pkl"):
+        cache_path = self.get_cache_path(key, extension)
+        return os.path.exists(cache_path)
+    def save(self, key, data, extension=".pkl"):
+        import pickle
+        cache_path = self.get_cache_path(key, extension)
+        with open(cache_path, 'wb') as f:
+            pickle.dump(data, f)
+        return cache_path
+    def load(self, key, extension=".pkl"):
+        import pickle
+        cache_path = self.get_cache_path(key, extension)
+        if os.path.exists(cache_path):
+            with open(cache_path, 'rb') as f:
+                return pickle.load(f)
+        return None
+    def clear(self):
+        import shutil
+        for file in os.listdir(self.cache_dir):
+            file_path = os.path.join(self.cache_dir, file)
+            if os.path.isfile(file_path):
+                os.unlink(file_path)
+            elif os.path.isdir(file_path):
+                shutil.rmtree(file_path)
+# Initialize cache
+result_cache = ResultCache()
+# Parallel processing functions
+def process_audio_segment(segment, transcriber):
+    """Process a single audio segment in parallel"""
+    return transcriber.transcribe_segment(segment)
+def generate_prompt_for_segment(transcription, prompt_generator):
+    """Generate a prompt for a single transcription in parallel"""
+    return prompt_generator.generate_optimized_prompt(transcription)
+def generate_image_for_prompt(prompt, image_generator):
+    """Generate an image for a single prompt in parallel"""
+    return image_generator.generate_image(prompt)
+def animate_image(image_path, animator, animation_type="random"):
+    """Animate a single image in parallel"""
+    return animator.animate_single_image(image_path, animation_type)
 # Main app flow
 def main():
+    # Settings sidebar with improved UI
     with st.sidebar:
+        st.markdown("## ⚙️ Settings")
+        # Performance settings with better organization
+        st.markdown("### 🚀 Performance")
+        with st.expander("Processing Options", expanded=True):
+            parallel_processing = st.toggle("Enable parallel processing", value=True,
+                                          help="Process multiple tasks simultaneously for faster results")
+            max_workers = st.slider("Max parallel workers", min_value=2, max_value=8, value=4,
+                                   help="Number of simultaneous tasks (higher values may use more memory)")
+            use_caching = st.toggle("Enable result caching", value=True,
+                                  help="Save results to speed up repeated conversions")
+        # Content settings
+        st.markdown("### 🎨 Content")
+        with st.expander("Segmentation", expanded=True):
+            num_segments = st.slider("Number of segments", min_value=2, max_value=10, value=5,
+                                    help="How many scenes to create in your video")
+            animation_type = st.selectbox(
+                "Animation style",
+                ["random", "zoom", "pan_right", "pan_left", "fade_in"],
+                help="Choose how images will animate in your video"
+            )
         # Advanced settings
+        st.markdown("### 🔧 Advanced")
+        with st.expander("Image Settings"):
             image_size = st.select_slider(
                 "Image Size",
                 options=[(256, 256), (384, 384), (512, 512)],
+                value=(384, 384),  # Default to medium size for better performance
+                format_func=lambda x: f"{x[0]}x{x[1]}",
+                help="Larger sizes create higher quality images but take longer"
             )
+            inference_steps = st.slider("Image Quality", min_value=10, max_value=50, value=20,
+                                      help="Higher values create better images but take longer")
         with st.expander("Video Settings"):
             video_quality = st.select_slider(
                 "Video Quality",
                 options=["low", "medium", "high"],
+                value="medium",
+                help="Higher quality creates larger files"
             )
             # Map quality to bitrate
                 "high": "2000k"
             }
             bitrate = bitrate_map[video_quality]
+        # Clear cache button
+        if st.button("🧹 Clear Cache", help="Remove all cached results to free up disk space"):
+            result_cache.clear()
+            st.success("Cache cleared successfully!")
+        # About section
+        st.markdown("---")
+        st.markdown("### 📝 About")
+        st.markdown("""
+        This app uses AI to convert audio to video.
+        Optimized for Hugging Face Spaces with:
+        - Parallel processing
+        - Memory-efficient models
+        - Result caching
+        - Batch processing
+        """)
+    # Main content area
+    # File uploader with better styling
+    st.markdown("### 📁 Upload Your Audio")
+    audio_file = st.file_uploader("Select an audio file (WAV, MP3, OGG)", type=["wav", "mp3", "ogg"])
     if audio_file is not None:
+        # Display audio player with better styling
+        st.markdown("### 🎵 Preview Your Audio")
         st.audio(audio_file)
+        # Generate a cache key based on the audio file and settings
+        audio_bytes = audio_file.getvalue()
+        settings_str = f"{num_segments}_{animation_type}_{image_size}_{inference_steps}_{video_quality}"
+        cache_key = hashlib.md5((hashlib.md5(audio_bytes).hexdigest() + settings_str).encode()).hexdigest()
+        # Process button with better styling
+        st.markdown("### 🔄 Process Your Audio")
+        convert_col, time_col = st.columns([3, 1])
+        with convert_col:
+            convert_button = st.button("🎬 Convert to Video", type="primary", use_container_width=True)
+        with time_col:
+            st.info("Processing time: ~1-3 minutes")
+        # Check if result is already in cache
+        if use_caching and result_cache.exists(cache_key, ".mp4") and convert_button:
+            output_video = result_cache.get_cache_path(cache_key, ".mp4")
+            st.success("✅ Found cached result! Loading video...")
+            # Display the cached video
+            st.markdown("### 🎥 Your Video")
+            st.video(output_video)
+            with open(output_video, "rb") as file:
+                st.download_button(
+                    label="📥 Download Video",
+                    data=file,
+                    file_name="audio_to_video.mp4",
+                    mime="video/mp4",
+                    use_container_width=True
+                )
+            return
+        if convert_button:
+            # Initialize progress tracking with better UI
+            progress_container = st.container()
+            with progress_container:
+                progress_bar = st.progress(0)
+                status_text = st.empty()
+                # Add a processing animation
+                processing_col1, processing_col2 = st.columns([1, 3])
+                with processing_col1:
+                    st.markdown("### Processing:")
+                with processing_col2:
+                    status_message = st.empty()
             try:
                 # Step 1: Initialize components
                 status_text.text("Initializing components...")
+                status_message.markdown("🔄 **Setting up AI models...**")
                 transcriber = get_transcriber()
                 prompt_generator = get_prompt_generator()
                 image_generator = get_image_generator()
                 animator = get_animator()
                 video_creator = get_video_creator()
+                # Update image generator settings
+                image_generator.set_inference_steps(inference_steps)
+                image_generator.set_target_size(image_size)
                 progress_bar.progress(10)
                 # Step 2: Segment and transcribe audio
+                status_text.text("Segmenting audio...")
+                status_message.markdown("🔊 **Analyzing audio...**")
                 audio_segments, timestamps = transcriber.segment_audio(audio_file, num_segments=num_segments)
+                progress_bar.progress(15)
+                # Transcribe segments in parallel if enabled
+                status_text.text("Transcribing audio segments...")
+                status_message.markdown("🎤 **Converting speech to text...**")
+                if parallel_processing:
+                    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+                        # Create a partial function with the transcriber
+                        process_func = partial(process_audio_segment, transcriber=transcriber)
+                        # Process segments in parallel
+                        transcriptions = list(executor.map(process_func, audio_segments))
+                else:
+                    transcriptions = [transcriber.transcribe_segment(segment) for segment in audio_segments]
+                # Display transcriptions with better styling
                 progress_bar.progress(30)
+                st.markdown("### 📝 Transcriptions")
+                for i, (trans, (start, end)) in enumerate(zip(transcriptions, timestamps)):
+                    st.markdown(f"""
+                    <div style="background-color: #f0f2f6; padding: 10px; border-radius: 5px; margin-bottom: 10px;">
+                        <strong>Segment {i+1} ({start:.1f}s - {end:.1f}s):</strong> {trans}
+                    </div>
+                    """, unsafe_allow_html=True)
+                # Step 3: Generate prompts in parallel
                 status_text.text("Generating prompts from transcriptions...")
+                status_message.markdown("✍️ **Creating image descriptions...**")
+                if parallel_processing:
+                    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+                        # Create a partial function with the prompt generator
+                        prompt_func = partial(generate_prompt_for_segment, prompt_generator=prompt_generator)
+                        # Generate prompts in parallel
+                        prompts = list(executor.map(prompt_func, transcriptions))
+                else:
+                    prompts = [prompt_generator.generate_optimized_prompt(trans) for trans in transcriptions]
+                # Display prompts with better styling
                 progress_bar.progress(40)
+                st.markdown("### 🖋️ Generated Prompts")
+                for i, prompt in enumerate(prompts):
+                    st.markdown(f"""
+                    <div style="background-color: #e8f4f8; padding: 10px; border-radius: 5px; margin-bottom: 10px;">
+                        <strong>Prompt {i+1}:</strong> {prompt}
+                    </div>
+                    """, unsafe_allow_html=True)
+                # Step 4: Generate images in parallel
                 status_text.text("Generating images from prompts...")
+                status_message.markdown("🎨 **Creating images...**")
+                if parallel_processing:
+                    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+                        # Create a partial function with the image generator
+                        image_func = partial(generate_image_for_prompt, image_generator=image_generator)
+                        # Generate images in parallel
+                        images = list(executor.map(image_func, prompts))
+                else:
+                    images = []
+                    for i, prompt in enumerate(prompts):
+                        status_text.text(f"Generating image {i+1}/{len(prompts)}...")
+                        images.append(image_generator.generate_image(prompt))
+                # Display images with better styling
                 progress_bar.progress(60)
+                st.markdown("### 🖼️ Generated Images")
+                image_cols = st.columns(min(len(images), 3))
+                for i, img_path in enumerate(images):
+                    with image_cols[i % len(image_cols)]:
+                        st.image(img_path, caption=f"Image {i+1}", use_column_width=True)
+                # Step 5: Add animations in parallel
                 status_text.text("Adding animations to images...")
+                status_message.markdown("✨ **Adding animations...**")
+                if parallel_processing:
+                    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+                        # Create a partial function with the animator and animation type
+                        animate_func = partial(animate_image, animator=animator, animation_type=animation_type)
+                        # Animate images in parallel
+                        animated_frames = list(executor.map(animate_func, images))
+                else:
+                    animated_frames = []
+                    for i, img_path in enumerate(images):
+                        status_text.text(f"Animating image {i+1}/{len(images)}...")
+                        animated_frames.append(animator.animate_single_image(img_path, animation_type))
                 progress_bar.progress(80)
                 # Step 6: Create video
                 status_text.text("Creating final video...")
+                status_message.markdown("🎬 **Assembling video...**")
                 output_video = video_creator.create_video_from_frames(
                     animated_frames,
                     audio_file,
                     segments=transcriptions,
+                    timestamps=timestamps,
+                    parallel=parallel_processing,
+                    max_workers=max_workers
                 )
                 # Optimize video if needed
                 if video_quality != "high":
                     status_text.text("Optimizing video for web...")
+                    status_message.markdown("⚙️ **Optimizing video...**")
                     output_video = video_creator.optimize_video(
                         output_video,
                         target_size=(640, 480) if video_quality == "low" else (854, 480),
+                        bitrate=bitrate,
+                        threads=max_workers
                     )
+                # Cache the result if caching is enabled
+                if use_caching:
+                    import shutil
+                    cached_path = result_cache.get_cache_path(cache_key, ".mp4")
+                    shutil.copy(output_video, cached_path)
                 progress_bar.progress(100)
                 status_text.text("Video creation complete!")
+                status_message.markdown("✅ **Done!**")
+                # Step 7: Display and provide download link with better styling
+                st.markdown("### 🎥 Your Video")
                 st.video(output_video)
+                st.markdown("### 📥 Download")
                 with open(output_video, "rb") as file:
                     st.download_button(
+                        label="📥 Download Video",
                         data=file,
                         file_name="audio_to_video.mp4",
+                        mime="video/mp4",
+                        use_container_width=True
                     )
+                # Performance metrics
+                st.markdown("### ⏱️ Performance Metrics")
+                st.info(f"""
+                - Parallel Processing: {'Enabled' if parallel_processing else 'Disabled'}
+                - Workers: {max_workers}
+                - Image Size: {image_size[0]}x{image_size[1]}
+                - Inference Steps: {inference_steps}
+                - Video Quality: {video_quality.capitalize()}
+                """)
                 # Clean up temporary files
                 status_text.text("Cleaning up temporary files...")
                 for path in images + [p for frames in animated_frames for p in frames]:
             except Exception as e:
                 st.error(f"An error occurred: {str(e)}")
                 st.exception(e)
+                # Provide troubleshooting tips
+                st.markdown("### 🔧 Troubleshooting Tips")
+                st.info("""
+                - Try reducing the number of segments
+                - Use a smaller image size
+                - Reduce inference steps
+                - Make sure your audio file is in a supported format
+                - Clear the cache and try again
+                """)
 if __name__ == "__main__":
     main()

image_generator.py CHANGED Viewed

@@ -1,19 +1,25 @@
 import streamlit as st
 import torch
-from diffusers import StableDiffusionPipeline
-from PIL import Image
 import os
 import time
 class ImageGenerator:
     def __init__(self):
         self.model = None
     def load_model(self):
         """Load a lightweight image generation model"""
         if self.model is None:
             with st.spinner("Loading image generation model... This may take a moment."):
                 # Using a lightweight model for image generation
                 model_id = "runwayml/stable-diffusion-v1-5"
                 # Load with memory optimization settings
@@ -31,9 +37,53 @@ class ImageGenerator:
                 if hasattr(self.model, 'enable_attention_slicing'):
                     self.model.enable_attention_slicing()
         return self.model
-    def generate_images(self, prompts, output_dir="temp", progress_callback=None):
         """Generate images from the prompts"""
         # Load the model if not already loaded
         model = self.load_model()
@@ -41,30 +91,34 @@ class ImageGenerator:
         # Ensure output directory exists
         os.makedirs(output_dir, exist_ok=True)
-        images = []
-        for i, prompt in enumerate(prompts):
-            if progress_callback:
-                progress_callback(f"Generating image {i+1}/{len(prompts)}...")
-            # Generate image with minimal inference steps to save resources
-            image = model(
-                prompt,
-                num_inference_steps=20,  # Reduced steps for speed
-                guidance_scale=7.5      # Standard guidance scale
-            ).images[0]
-            # Save the image
-            image_path = f"{output_dir}/image_{i}.png"
-            image.save(image_path)
-            images.append(image_path)
-            # Small delay to prevent resource exhaustion
-            time.sleep(0.5)
         return images
-    def optimize_image(self, image_path, target_size=(512, 512)):
         """Optimize image size for video creation"""
         img = Image.open(image_path)
         # Resize to target size
@@ -75,11 +129,69 @@ class ImageGenerator:
         return image_path
-    def optimize_all_images(self, image_paths, target_size=(512, 512)):
         """Optimize all images for video creation"""
-        optimized_paths = []
-        for path in image_paths:
-            optimized_path = self.optimize_image(path, target_size)
-            optimized_paths.append(optimized_path)
         return optimized_paths

 import streamlit as st
 import torch
 import os
+import numpy as np
+from PIL import Image
 import time
+from concurrent.futures import ThreadPoolExecutor
+from functools import partial
 class ImageGenerator:
     def __init__(self):
         self.model = None
+        self.inference_steps = 20
+        self.target_size = (384, 384)
     def load_model(self):
         """Load a lightweight image generation model"""
         if self.model is None:
             with st.spinner("Loading image generation model... This may take a moment."):
                 # Using a lightweight model for image generation
+                from diffusers import StableDiffusionPipeline
                 model_id = "runwayml/stable-diffusion-v1-5"
                 # Load with memory optimization settings
                 if hasattr(self.model, 'enable_attention_slicing'):
                     self.model.enable_attention_slicing()
+                # Enable memory efficient attention
+                if hasattr(self.model, 'enable_vae_slicing'):
+                    self.model.enable_vae_slicing()
+                # Enable xformers memory efficient attention if available
+                try:
+                    if hasattr(self.model, 'enable_xformers_memory_efficient_attention'):
+                        self.model.enable_xformers_memory_efficient_attention()
+                except:
+                    pass
         return self.model
+    def set_inference_steps(self, steps):
+        """Set the number of inference steps"""
+        self.inference_steps = steps
+    def set_target_size(self, size):
+        """Set the target image size"""
+        self.target_size = size
+    def generate_image(self, prompt, output_dir="temp"):
+        """Generate a single image from a prompt"""
+        # Load the model if not already loaded
+        model = self.load_model()
+        # Ensure output directory exists
+        os.makedirs(output_dir, exist_ok=True)
+        # Generate image with minimal inference steps to save resources
+        image = model(
+            prompt,
+            num_inference_steps=self.inference_steps,
+            guidance_scale=7.5
+        ).images[0]
+        # Resize to target size for consistency and performance
+        if image.size != self.target_size:
+            image = image.resize(self.target_size, Image.LANCZOS)
+        # Save the image
+        image_path = f"{output_dir}/image_{int(time.time() * 1000)}.png"
+        image.save(image_path)
+        return image_path
+    def generate_images(self, prompts, output_dir="temp", progress_callback=None, parallel=False, max_workers=4):
         """Generate images from the prompts"""
         # Load the model if not already loaded
         model = self.load_model()
         # Ensure output directory exists
         os.makedirs(output_dir, exist_ok=True)
+        if parallel and len(prompts) > 1:
+            # Generate images in parallel
+            with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                # Create a partial function with fixed parameters
+                generate_func = partial(self.generate_image, output_dir=output_dir)
+                # Process prompts in parallel and collect results
+                if progress_callback:
+                    progress_callback("Generating images in parallel...")
+                images = list(executor.map(generate_func, prompts))
+        else:
+            # Generate images sequentially
+            images = []
+            for i, prompt in enumerate(prompts):
+                if progress_callback:
+                    progress_callback(f"Generating image {i+1}/{len(prompts)}...")
+                image_path = self.generate_image(prompt, output_dir)
+                images.append(image_path)
         return images
+    def optimize_image(self, image_path, target_size=None):
         """Optimize image size for video creation"""
+        if target_size is None:
+            target_size = self.target_size
         img = Image.open(image_path)
         # Resize to target size
         return image_path
+    def optimize_all_images(self, image_paths, target_size=None, parallel=False, max_workers=4):
         """Optimize all images for video creation"""
+        if target_size is None:
+            target_size = self.target_size
+        if parallel and len(image_paths) > 1:
+            # Optimize images in parallel
+            with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                # Create a partial function with fixed parameters
+                optimize_func = partial(self.optimize_image, target_size=target_size)
+                # Process images in parallel
+                optimized_paths = list(executor.map(optimize_func, image_paths))
+        else:
+            # Optimize images sequentially
+            optimized_paths = []
+            for path in image_paths:
+                optimized_path = self.optimize_image(path, target_size)
+                optimized_paths.append(optimized_path)
         return optimized_paths
+    def batch_generate_images(self, prompts, batch_size=2, output_dir="temp", progress_callback=None):
+        """Generate images in batches to optimize memory usage"""
+        # Load the model if not already loaded
+        model = self.load_model()
+        # Ensure output directory exists
+        os.makedirs(output_dir, exist_ok=True)
+        images = []
+        # Process prompts in batches
+        for i in range(0, len(prompts), batch_size):
+            batch_prompts = prompts[i:i+batch_size]
+            if progress_callback:
+                progress_callback(f"Generating batch {i//batch_size + 1}/{(len(prompts) + batch_size - 1)//batch_size}...")
+            # Generate images for this batch
+            batch_images = []
+            for j, prompt in enumerate(batch_prompts):
+                # Generate image
+                image = model(
+                    prompt,
+                    num_inference_steps=self.inference_steps,
+                    guidance_scale=7.5
+                ).images[0]
+                # Resize to target size
+                if image.size != self.target_size:
+                    image = image.resize(self.target_size, Image.LANCZOS)
+                # Save the image
+                image_path = f"{output_dir}/image_{i+j}_{int(time.time() * 1000)}.png"
+                image.save(image_path)
+                batch_images.append(image_path)
+            # Add batch results to overall results
+            images.extend(batch_images)
+            # Clear CUDA cache if using GPU
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+        return images

prompt_generator.py CHANGED Viewed

@@ -5,6 +5,7 @@ from transformers import pipeline
 class PromptGenerator:
     def __init__(self):
         self.model = None
     def load_model(self):
         """Load a lightweight text generation model"""
@@ -14,6 +15,37 @@ class PromptGenerator:
                 self.model = pipeline("text-generation", model="distilgpt2")
         return self.model
     def generate_prompts(self, text, num_segments=5):
         """Generate image prompts from the transcription"""
         # Load the model if not already loaded
@@ -50,25 +82,24 @@ class PromptGenerator:
         return prompts, segments
-    def generate_optimized_prompts(self, transcriptions, timestamps=None):
-        """Generate optimized prompts from transcribed segments"""
         model = self.load_model()
-        prompts = []
-        for i, transcription in enumerate(transcriptions):
-            # Skip empty transcriptions
-            if not transcription.strip():
-                continue
-            # Create a prompt template focused on visual elements
-            template = f"Describe a visual scene for: '{transcription}'"
-            # Generate with minimal tokens to save resources
-            result = model(template, max_length=30, num_return_sequences=1)
-            generated_text = result[0]['generated_text'].replace(template, "").strip()
-            # Create an optimized prompt with style keywords
-            prompt = f"{transcription} {generated_text}, detailed, vibrant, cinematic"
-            prompts.append(prompt)
         return prompts

 class PromptGenerator:
     def __init__(self):
         self.model = None
+        self.prompt_cache = {}
     def load_model(self):
         """Load a lightweight text generation model"""
                 self.model = pipeline("text-generation", model="distilgpt2")
         return self.model
+    def generate_optimized_prompt(self, transcription):
+        """Generate an optimized prompt from a single transcription"""
+        # Check cache first
+        import hashlib
+        cache_key = hashlib.md5(transcription.encode()).hexdigest()
+        if cache_key in self.prompt_cache:
+            return self.prompt_cache[cache_key]
+        # Load the model if not already loaded
+        model = self.load_model()
+        # Skip empty transcriptions
+        if not transcription.strip():
+            return ""
+        # Create a prompt template focused on visual elements
+        template = f"Describe a visual scene for: '{transcription}'"
+        # Generate with minimal tokens to save resources
+        result = model(template, max_length=30, num_return_sequences=1)
+        generated_text = result[0]['generated_text'].replace(template, "").strip()
+        # Create an optimized prompt with style keywords
+        prompt = f"{transcription} {generated_text}, detailed, vibrant, cinematic"
+        # Cache the result
+        self.prompt_cache[cache_key] = prompt
+        return prompt
     def generate_prompts(self, text, num_segments=5):
         """Generate image prompts from the transcription"""
         # Load the model if not already loaded
         return prompts, segments
+    def generate_optimized_prompts(self, transcriptions, parallel=False, max_workers=4):
+        """Generate optimized prompts from transcribed segments with parallel processing"""
+        import concurrent.futures
+        # Load the model
         model = self.load_model()
+        if parallel and len(transcriptions) > 1:
+            # Process in parallel
+            with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+                prompts = list(executor.map(self.generate_optimized_prompt, transcriptions))
+        else:
+            # Process sequentially
+            prompts = [self.generate_optimized_prompt(trans) for trans in transcriptions]
         return prompts
+    def clear_cache(self):
+        """Clear the prompt cache"""
+        self.prompt_cache = {}
+        return True

requirements.txt CHANGED Viewed

@@ -4,7 +4,7 @@ torch --extra-index-url https://download.pytorch.org/whl/cpu
 torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
 diffusers
 accelerate
-moviepy==1.0.3
 librosa
 soundfile
 numpy

 torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
 diffusers
 accelerate
+moviepy
 librosa
 soundfile
 numpy

transcriber.py CHANGED Viewed

@@ -1,16 +1,18 @@
 import streamlit as st
-import os
-import tempfile
 import torch
 import librosa
 import numpy as np
-from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
 class AudioTranscriber:
     def __init__(self):
         self.model = None
         self.processor = None
         self.pipe = None
     def load_model(self):
         """Load a lightweight transcription model"""
@@ -47,6 +49,14 @@ class AudioTranscriber:
     def transcribe(self, audio_file):
         """Transcribe the audio file using the loaded model"""
         # Load the model if not already loaded
         pipe = self.load_model()
@@ -63,6 +73,9 @@ class AudioTranscriber:
             result = pipe(y)
             transcription = result["text"]
             return transcription
         finally:
             # Clean up temporary file
@@ -109,13 +122,31 @@ class AudioTranscriber:
             if os.path.exists(tmp_path):
                 os.unlink(tmp_path)
-    def transcribe_segments(self, segments):
-        """Transcribe individual audio segments"""
         pipe = self.load_model()
-        transcriptions = []
-        for segment in segments:
-            result = pipe(segment)
-            transcriptions.append(result["text"])
         return transcriptions

 import streamlit as st
 import torch
+from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
 import librosa
 import numpy as np
+import tempfile
+import os
+from concurrent.futures import ThreadPoolExecutor
 class AudioTranscriber:
     def __init__(self):
         self.model = None
         self.processor = None
         self.pipe = None
+        self.transcription_cache = {}
     def load_model(self):
         """Load a lightweight transcription model"""
     def transcribe(self, audio_file):
         """Transcribe the audio file using the loaded model"""
+        # Generate a cache key based on the audio file
+        import hashlib
+        cache_key = hashlib.md5(audio_file.getvalue()).hexdigest()
+        # Check if result is in cache
+        if cache_key in self.transcription_cache:
+            return self.transcription_cache[cache_key]
         # Load the model if not already loaded
         pipe = self.load_model()
             result = pipe(y)
             transcription = result["text"]
+            # Cache the result
+            self.transcription_cache[cache_key] = transcription
             return transcription
         finally:
             # Clean up temporary file
             if os.path.exists(tmp_path):
                 os.unlink(tmp_path)
+    def transcribe_segment(self, segment):
+        """Transcribe a single audio segment"""
+        pipe = self.load_model()
+        result = pipe(segment)
+        return result["text"]
+    def transcribe_segments(self, segments, parallel=False, max_workers=4):
+        """Transcribe individual audio segments with optional parallel processing"""
         pipe = self.load_model()
+        if parallel and len(segments) > 1:
+            # Process in parallel using ThreadPoolExecutor
+            with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                # Process segments in parallel
+                transcriptions = list(executor.map(self.transcribe_segment, segments))
+        else:
+            # Process sequentially
+            transcriptions = []
+            for segment in segments:
+                result = pipe(segment)
+                transcriptions.append(result["text"])
         return transcriptions
+    def clear_cache(self):
+        """Clear the transcription cache"""
+        self.transcription_cache = {}
+        return True

video_creator.py CHANGED Viewed

@@ -3,14 +3,54 @@ import os
 import tempfile
 from moviepy.editor import ImageSequenceClip, AudioFileClip, concatenate_videoclips, TextClip, CompositeVideoClip
 import numpy as np
 class VideoCreator:
     def __init__(self):
         # Ensure output directory exists
         os.makedirs("outputs", exist_ok=True)
-    def create_video_from_frames(self, animated_frames, audio_file, segments=None, timestamps=None, output_dir="outputs"):
-        """Create a video from animated frames synchronized with audio"""
         # Save the uploaded audio to a temporary file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
             tmp_file.write(audio_file.getvalue())
@@ -32,37 +72,26 @@ class VideoCreator:
             # Create video clips for each animated segment
             video_clips = []
-            for i, frames in enumerate(animated_frames):
-                # Calculate frame duration based on segment duration
-                segment_duration = segment_durations[min(i, len(segment_durations)-1)]
-                frame_duration = segment_duration / len(frames)
-                # Create a clip from the frames
-                segment_clip = ImageSequenceClip(frames, durations=[frame_duration] * len(frames))
-                # Add text overlay if segments are provided
-                if segments and i < len(segments):
-                    segment_text = segments[i]
-                    # Create a simple text overlay using a workaround since TextClip might be resource-intensive
-                    # This is a simplified approach - in production, you'd use TextClip properly
-                    try:
-                        txt_clip = TextClip(
-                            segment_text,
-                            fontsize=24,
-                            color='white',
-                            bg_color='rgba(0,0,0,0.5)',
-                            size=(segment_clip.w, None),
-                            method='caption'
-                        ).set_duration(segment_clip.duration)
-                        txt_clip = txt_clip.set_position(('center', 'bottom'))
-                        segment_clip = CompositeVideoClip([segment_clip, txt_clip])
-                    except Exception as e:
-                        # If TextClip fails, continue without text overlay
-                        st.warning(f"Could not add text overlay: {e}")
-                video_clips.append(segment_clip)
             # Concatenate all clips
             final_clip = concatenate_videoclips(video_clips)
@@ -71,7 +100,7 @@ class VideoCreator:
             final_clip = final_clip.set_audio(audio_clip)
             # Write the result to a file
-            output_path = f"{output_dir}/output_video.mp4"
             # Use lower resolution and bitrate for faster processing
             final_clip.write_videofile(
@@ -80,10 +109,13 @@ class VideoCreator:
                 codec='libx264',
                 audio_codec='aac',
                 preset='ultrafast',  # Faster encoding
-                threads=2,           # Limit threads to save resources
                 bitrate='1000k'      # Lower bitrate
             )
             return output_path
         finally:
@@ -91,7 +123,7 @@ class VideoCreator:
             if os.path.exists(audio_path):
                 os.unlink(audio_path)
-    def optimize_video(self, video_path, target_size=(640, 480), bitrate='1000k'):
         """Optimize video size and quality for web delivery"""
         from moviepy.editor import VideoFileClip
@@ -102,13 +134,13 @@ class VideoCreator:
         clip_resized = clip.resize(target_size)
         # Save optimized video
-        optimized_path = video_path.replace('.mp4', '_optimized.mp4')
         clip_resized.write_videofile(
             optimized_path,
             codec='libx264',
             audio_codec='aac',
             preset='ultrafast',
-            threads=2,
             bitrate=bitrate
         )
@@ -117,3 +149,8 @@ class VideoCreator:
         clip_resized.close()
         return optimized_path

 import tempfile
 from moviepy.editor import ImageSequenceClip, AudioFileClip, concatenate_videoclips, TextClip, CompositeVideoClip
 import numpy as np
+from concurrent.futures import ThreadPoolExecutor
+import time
 class VideoCreator:
     def __init__(self):
         # Ensure output directory exists
         os.makedirs("outputs", exist_ok=True)
+        self.video_cache = {}
+    def create_segment_clip(self, frames, segment_duration, segment_text=None):
+        """Create a video clip from frames with optional text overlay"""
+        # Calculate frame duration based on segment duration
+        frame_duration = segment_duration / len(frames)
+        # Create a clip from the frames
+        segment_clip = ImageSequenceClip(frames, durations=[frame_duration] * len(frames))
+        # Add text overlay if segment text is provided
+        if segment_text:
+            try:
+                txt_clip = TextClip(
+                    segment_text,
+                    fontsize=24,
+                    color='white',
+                    bg_color='rgba(0,0,0,0.5)',
+                    size=(segment_clip.w, None),
+                    method='caption'
+                ).set_duration(segment_clip.duration)
+                txt_clip = txt_clip.set_position(('center', 'bottom'))
+                segment_clip = CompositeVideoClip([segment_clip, txt_clip])
+            except Exception as e:
+                # If TextClip fails, continue without text overlay
+                st.warning(f"Could not add text overlay: {e}")
+        return segment_clip
+    def create_video_from_frames(self, animated_frames, audio_file, segments=None, timestamps=None,
+                                output_dir="outputs", parallel=False, max_workers=4):
+        """Create a video from animated frames synchronized with audio using parallel processing"""
+        # Generate a cache key based on inputs
+        import hashlib
+        cache_key = f"{hashlib.md5(audio_file.getvalue()).hexdigest()}_{len(animated_frames)}"
+        # Check if result is in cache
+        if cache_key in self.video_cache:
+            return self.video_cache[cache_key]
         # Save the uploaded audio to a temporary file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
             tmp_file.write(audio_file.getvalue())
             # Create video clips for each animated segment
             video_clips = []
+            if parallel and len(animated_frames) > 1:
+                # Process segments in parallel
+                with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                    # Prepare arguments for parallel processing
+                    args = []
+                    for i, frames in enumerate(animated_frames):
+                        segment_duration = segment_durations[min(i, len(segment_durations)-1)]
+                        segment_text = segments[i] if segments and i < len(segments) else None
+                        args.append((frames, segment_duration, segment_text))
+                    # Process in parallel
+                    video_clips = list(executor.map(lambda x: self.create_segment_clip(*x), args))
+            else:
+                # Process segments sequentially
+                for i, frames in enumerate(animated_frames):
+                    segment_duration = segment_durations[min(i, len(segment_durations)-1)]
+                    segment_text = segments[i] if segments and i < len(segments) else None
+                    segment_clip = self.create_segment_clip(frames, segment_duration, segment_text)
+                    video_clips.append(segment_clip)
             # Concatenate all clips
             final_clip = concatenate_videoclips(video_clips)
             final_clip = final_clip.set_audio(audio_clip)
             # Write the result to a file
+            output_path = f"{output_dir}/output_video_{int(time.time())}.mp4"
             # Use lower resolution and bitrate for faster processing
             final_clip.write_videofile(
                 codec='libx264',
                 audio_codec='aac',
                 preset='ultrafast',  # Faster encoding
+                threads=max_workers,  # Use multiple threads for encoding
                 bitrate='1000k'      # Lower bitrate
             )
+            # Cache the result
+            self.video_cache[cache_key] = output_path
             return output_path
         finally:
             if os.path.exists(audio_path):
                 os.unlink(audio_path)
+    def optimize_video(self, video_path, target_size=(640, 480), bitrate='1000k', threads=2):
         """Optimize video size and quality for web delivery"""
         from moviepy.editor import VideoFileClip
         clip_resized = clip.resize(target_size)
         # Save optimized video
+        optimized_path = video_path.replace('.mp4', f'_optimized_{int(time.time())}.mp4')
         clip_resized.write_videofile(
             optimized_path,
             codec='libx264',
             audio_codec='aac',
             preset='ultrafast',
+            threads=threads,
             bitrate=bitrate
         )
         clip_resized.close()
         return optimized_path
+    def clear_cache(self):
+        """Clear the video cache"""
+        self.video_cache = {}
+        return True