garyuzair commited on
Commit
7323bbb
·
verified ·
1 Parent(s): c8e5c2a

Upload 7 files

Browse files
Files changed (7) hide show
  1. animator.py +83 -30
  2. app.py +304 -68
  3. image_generator.py +140 -28
  4. prompt_generator.py +49 -18
  5. requirements.txt +1 -1
  6. transcriber.py +40 -9
  7. video_creator.py +74 -37
animator.py CHANGED
@@ -2,15 +2,21 @@ import streamlit as st
2
  import os
3
  import numpy as np
4
  from PIL import Image
5
- import tempfile
6
  import time
 
 
7
 
8
  class Animator:
9
  def __init__(self):
10
- pass
11
 
12
  def add_zoom_animation(self, image_path, num_frames=10, zoom_factor=1.05, output_dir="temp"):
13
  """Add a simple zoom animation to an image"""
 
 
 
 
 
14
  # Ensure output directory exists
15
  os.makedirs(output_dir, exist_ok=True)
16
 
@@ -34,10 +40,17 @@ class Animator:
34
  new_img.save(frame_path)
35
  frames.append(frame_path)
36
 
 
 
37
  return frames
38
 
39
  def add_pan_animation(self, image_path, num_frames=10, direction="right", output_dir="temp"):
40
  """Add a simple panning animation to an image"""
 
 
 
 
 
41
  # Ensure output directory exists
42
  os.makedirs(output_dir, exist_ok=True)
43
 
@@ -77,10 +90,17 @@ class Animator:
77
  new_img.save(frame_path)
78
  frames.append(frame_path)
79
 
 
 
80
  return frames
81
 
82
  def add_fade_animation(self, image_path, num_frames=10, fade_type="in", output_dir="temp"):
83
  """Add a fade in/out animation to an image"""
 
 
 
 
 
84
  # Ensure output directory exists
85
  os.makedirs(output_dir, exist_ok=True)
86
 
@@ -108,37 +128,70 @@ class Animator:
108
  new_img.convert("RGB").save(frame_path)
109
  frames.append(frame_path)
110
 
 
 
111
  return frames
112
 
113
- def animate_images(self, image_paths, animation_type="random", output_dir="temp", progress_callback=None):
114
- """Add animations to a list of images"""
115
- all_animated_frames = []
116
-
117
  animation_types = ["zoom", "pan_right", "pan_left", "fade_in"]
118
 
119
- for i, img_path in enumerate(image_paths):
120
- if progress_callback:
121
- progress_callback(f"Animating image {i+1}/{len(image_paths)}...")
122
-
123
- # Choose animation type
124
- if animation_type == "random":
125
- chosen_type = animation_types[i % len(animation_types)]
126
- else:
127
- chosen_type = animation_type
128
-
129
- # Apply the chosen animation
130
- if chosen_type.startswith("pan"):
131
- direction = chosen_type.split("_")[1] if "_" in chosen_type else "right"
132
- frames = self.add_pan_animation(img_path, direction=direction, output_dir=output_dir)
133
- elif chosen_type.startswith("fade"):
134
- fade_type = chosen_type.split("_")[1] if "_" in chosen_type else "in"
135
- frames = self.add_fade_animation(img_path, fade_type=fade_type, output_dir=output_dir)
136
- else: # Default to zoom
137
- frames = self.add_zoom_animation(img_path, output_dir=output_dir)
138
-
139
- all_animated_frames.append(frames)
140
-
141
- # Small delay to prevent resource exhaustion
142
- time.sleep(0.1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
  return all_animated_frames
 
 
 
 
 
 
2
  import os
3
  import numpy as np
4
  from PIL import Image
 
5
  import time
6
+ from concurrent.futures import ThreadPoolExecutor
7
+ from functools import partial
8
 
9
  class Animator:
10
  def __init__(self):
11
+ self.frame_cache = {}
12
 
13
  def add_zoom_animation(self, image_path, num_frames=10, zoom_factor=1.05, output_dir="temp"):
14
  """Add a simple zoom animation to an image"""
15
+ # Check cache first
16
+ cache_key = f"zoom_{image_path}_{num_frames}_{zoom_factor}"
17
+ if cache_key in self.frame_cache:
18
+ return self.frame_cache[cache_key]
19
+
20
  # Ensure output directory exists
21
  os.makedirs(output_dir, exist_ok=True)
22
 
 
40
  new_img.save(frame_path)
41
  frames.append(frame_path)
42
 
43
+ # Cache the result
44
+ self.frame_cache[cache_key] = frames
45
  return frames
46
 
47
  def add_pan_animation(self, image_path, num_frames=10, direction="right", output_dir="temp"):
48
  """Add a simple panning animation to an image"""
49
+ # Check cache first
50
+ cache_key = f"pan_{image_path}_{num_frames}_{direction}"
51
+ if cache_key in self.frame_cache:
52
+ return self.frame_cache[cache_key]
53
+
54
  # Ensure output directory exists
55
  os.makedirs(output_dir, exist_ok=True)
56
 
 
90
  new_img.save(frame_path)
91
  frames.append(frame_path)
92
 
93
+ # Cache the result
94
+ self.frame_cache[cache_key] = frames
95
  return frames
96
 
97
  def add_fade_animation(self, image_path, num_frames=10, fade_type="in", output_dir="temp"):
98
  """Add a fade in/out animation to an image"""
99
+ # Check cache first
100
+ cache_key = f"fade_{image_path}_{num_frames}_{fade_type}"
101
+ if cache_key in self.frame_cache:
102
+ return self.frame_cache[cache_key]
103
+
104
  # Ensure output directory exists
105
  os.makedirs(output_dir, exist_ok=True)
106
 
 
128
  new_img.convert("RGB").save(frame_path)
129
  frames.append(frame_path)
130
 
131
+ # Cache the result
132
+ self.frame_cache[cache_key] = frames
133
  return frames
134
 
135
+ def animate_single_image(self, img_path, animation_type="random", output_dir="temp"):
136
+ """Animate a single image"""
137
+ # Choose animation type
 
138
  animation_types = ["zoom", "pan_right", "pan_left", "fade_in"]
139
 
140
+ if animation_type == "random":
141
+ # Use hash of image path to deterministically select animation type
142
+ chosen_type = animation_types[hash(img_path) % len(animation_types)]
143
+ else:
144
+ chosen_type = animation_type
145
+
146
+ # Apply the chosen animation
147
+ if chosen_type.startswith("pan"):
148
+ direction = chosen_type.split("_")[1] if "_" in chosen_type else "right"
149
+ frames = self.add_pan_animation(img_path, direction=direction, output_dir=output_dir)
150
+ elif chosen_type.startswith("fade"):
151
+ fade_type = chosen_type.split("_")[1] if "_" in chosen_type else "in"
152
+ frames = self.add_fade_animation(img_path, fade_type=fade_type, output_dir=output_dir)
153
+ else: # Default to zoom
154
+ frames = self.add_zoom_animation(img_path, output_dir=output_dir)
155
+
156
+ return frames
157
+
158
+ def animate_images(self, image_paths, animation_type="random", output_dir="temp",
159
+ progress_callback=None, parallel=False, max_workers=4, batch_size=2):
160
+ """Add animations to a list of images with parallel processing and batching"""
161
+ all_animated_frames = []
162
+
163
+ if parallel and len(image_paths) > 1:
164
+ # Process in parallel using ThreadPoolExecutor
165
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
166
+ # Create a partial function with fixed parameters
167
+ animate_func = partial(self.animate_single_image,
168
+ animation_type=animation_type,
169
+ output_dir=output_dir)
170
+
171
+ # Process images in parallel
172
+ if progress_callback:
173
+ progress_callback("Animating images in parallel...")
174
+
175
+ # Map and collect results
176
+ all_animated_frames = list(executor.map(animate_func, image_paths))
177
+ else:
178
+ # Process in batches
179
+ for i in range(0, len(image_paths), batch_size):
180
+ batch = image_paths[i:i+batch_size]
181
+
182
+ if progress_callback:
183
+ progress_callback(f"Animating batch {i//batch_size + 1}/{(len(image_paths) + batch_size - 1)//batch_size}...")
184
+
185
+ batch_frames = []
186
+ for img_path in batch:
187
+ frames = self.animate_single_image(img_path, animation_type, output_dir)
188
+ batch_frames.append(frames)
189
+
190
+ all_animated_frames.extend(batch_frames)
191
 
192
  return all_animated_frames
193
+
194
+ def clear_cache(self):
195
+ """Clear the animation frame cache"""
196
+ self.frame_cache = {}
197
+ return True
app.py CHANGED
@@ -2,6 +2,10 @@ import streamlit as st
2
  import os
3
  import tempfile
4
  import time
 
 
 
 
5
 
6
  from transcriber import AudioTranscriber
7
  from prompt_generator import PromptGenerator
@@ -19,17 +23,25 @@ st.set_page_config(
19
  # Create necessary directories
20
  os.makedirs("temp", exist_ok=True)
21
  os.makedirs("outputs", exist_ok=True)
 
22
 
23
- # App title and description
24
- st.title("🎬 Audio to Video Converter")
25
  st.markdown("""
26
- This app converts your audio into a video by:
27
- 1. Transcribing your audio
28
- 2. Generating prompts from the transcription
29
- 3. Creating images based on those prompts
30
- 4. Adding animations to the images
31
- 5. Synchronizing with the audio
32
- 6. Providing a downloadable video
 
 
 
 
 
 
 
 
33
  """)
34
 
35
  # Initialize components with caching
@@ -53,35 +65,111 @@ def get_animator():
53
  def get_video_creator():
54
  return VideoCreator()
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  # Main app flow
57
  def main():
58
- # File uploader for audio
59
- audio_file = st.file_uploader("Upload your audio file (WAV, MP3, etc.)", type=["wav", "mp3", "ogg"])
60
-
61
- # Settings sidebar
62
  with st.sidebar:
63
- st.header("Settings")
64
- num_segments = st.slider("Number of segments", min_value=2, max_value=10, value=5)
65
- animation_type = st.selectbox(
66
- "Animation type",
67
- ["random", "zoom", "pan_right", "pan_left", "fade_in"]
68
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  # Advanced settings
71
- st.subheader("Advanced Settings")
72
- with st.expander("Image Generation"):
73
  image_size = st.select_slider(
74
  "Image Size",
75
  options=[(256, 256), (384, 384), (512, 512)],
76
- value=(512, 512),
77
- format_func=lambda x: f"{x[0]}x{x[1]}"
 
78
  )
 
 
79
 
80
  with st.expander("Video Settings"):
81
  video_quality = st.select_slider(
82
  "Video Quality",
83
  options=["low", "medium", "high"],
84
- value="medium"
 
85
  )
86
 
87
  # Map quality to bitrate
@@ -91,108 +179,246 @@ def main():
91
  "high": "2000k"
92
  }
93
  bitrate = bitrate_map[video_quality]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
  if audio_file is not None:
96
- # Display audio player
 
97
  st.audio(audio_file)
98
 
99
- # Process button
100
- if st.button("Convert to Video"):
101
- # Initialize progress tracking
102
- progress_bar = st.progress(0)
103
- status_text = st.empty()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
  try:
106
  # Step 1: Initialize components
107
  status_text.text("Initializing components...")
 
108
  transcriber = get_transcriber()
109
  prompt_generator = get_prompt_generator()
110
  image_generator = get_image_generator()
111
  animator = get_animator()
112
  video_creator = get_video_creator()
 
 
 
 
 
113
  progress_bar.progress(10)
114
 
115
  # Step 2: Segment and transcribe audio
116
- status_text.text("Segmenting and transcribing audio...")
 
117
  audio_segments, timestamps = transcriber.segment_audio(audio_file, num_segments=num_segments)
118
- transcriptions = transcriber.transcribe_segments(audio_segments)
119
 
120
- # Display transcriptions
121
- st.subheader("Transcriptions")
122
- for i, (trans, (start, end)) in enumerate(zip(transcriptions, timestamps)):
123
- st.write(f"**Segment {i+1} ({start:.1f}s - {end:.1f}s):** {trans}")
 
 
 
 
 
 
 
 
 
124
  progress_bar.progress(30)
 
 
 
 
 
 
 
125
 
126
- # Step 3: Generate prompts
127
  status_text.text("Generating prompts from transcriptions...")
128
- prompts = prompt_generator.generate_optimized_prompts(transcriptions)
 
 
 
 
 
 
 
 
129
 
130
- # Display prompts
131
- st.subheader("Generated Prompts")
132
- for i, prompt in enumerate(prompts):
133
- st.write(f"**Prompt {i+1}:** {prompt}")
134
  progress_bar.progress(40)
 
 
 
 
 
 
 
135
 
136
- # Step 4: Generate images
137
  status_text.text("Generating images from prompts...")
138
- images = image_generator.generate_images(
139
- prompts,
140
- progress_callback=status_text.text
141
- )
142
-
143
- # Optimize images
144
- status_text.text("Optimizing images...")
145
- optimized_images = image_generator.optimize_all_images(images, target_size=image_size)
 
 
 
 
146
 
147
- # Display images
148
- st.subheader("Generated Images")
149
- cols = st.columns(min(len(optimized_images), 3))
150
- for i, img_path in enumerate(optimized_images):
151
- cols[i % len(cols)].image(img_path, caption=f"Image {i+1}")
152
  progress_bar.progress(60)
 
 
 
 
 
153
 
154
- # Step 5: Add animations
155
  status_text.text("Adding animations to images...")
156
- animated_frames = animator.animate_images(
157
- optimized_images,
158
- animation_type=animation_type,
159
- progress_callback=status_text.text
160
- )
 
 
 
 
 
 
 
 
161
  progress_bar.progress(80)
162
 
163
  # Step 6: Create video
164
  status_text.text("Creating final video...")
 
165
  output_video = video_creator.create_video_from_frames(
166
  animated_frames,
167
  audio_file,
168
  segments=transcriptions,
169
- timestamps=timestamps
 
 
170
  )
171
 
172
  # Optimize video if needed
173
  if video_quality != "high":
174
  status_text.text("Optimizing video for web...")
 
175
  output_video = video_creator.optimize_video(
176
  output_video,
177
  target_size=(640, 480) if video_quality == "low" else (854, 480),
178
- bitrate=bitrate
 
179
  )
180
 
 
 
 
 
 
 
181
  progress_bar.progress(100)
182
  status_text.text("Video creation complete!")
 
183
 
184
- # Step 7: Display and provide download link
185
- st.subheader("Output Video")
186
  st.video(output_video)
187
 
 
188
  with open(output_video, "rb") as file:
189
  st.download_button(
190
- label="Download Video",
191
  data=file,
192
  file_name="audio_to_video.mp4",
193
- mime="video/mp4"
 
194
  )
195
 
 
 
 
 
 
 
 
 
 
 
196
  # Clean up temporary files
197
  status_text.text("Cleaning up temporary files...")
198
  for path in images + [p for frames in animated_frames for p in frames]:
@@ -207,6 +433,16 @@ def main():
207
  except Exception as e:
208
  st.error(f"An error occurred: {str(e)}")
209
  st.exception(e)
 
 
 
 
 
 
 
 
 
 
210
 
211
  if __name__ == "__main__":
212
  main()
 
2
  import os
3
  import tempfile
4
  import time
5
+ import concurrent.futures
6
+ from functools import partial
7
+ import torch
8
+ import hashlib
9
 
10
  from transcriber import AudioTranscriber
11
  from prompt_generator import PromptGenerator
 
23
  # Create necessary directories
24
  os.makedirs("temp", exist_ok=True)
25
  os.makedirs("outputs", exist_ok=True)
26
+ os.makedirs("cache", exist_ok=True)
27
 
28
+ # App title and description with improved styling
 
29
  st.markdown("""
30
+ <div style="text-align: center; background-color: #f0f2f6; padding: 20px; border-radius: 10px; margin-bottom: 20px;">
31
+ <h1 style="color: #1E88E5;">🎬 Audio to Video Converter</h1>
32
+ <p style="font-size: 18px;">Transform your audio into engaging videos with AI-powered visuals</p>
33
+ </div>
34
+ """, unsafe_allow_html=True)
35
+
36
+ # App description with better formatting
37
+ st.markdown("""
38
+ ### How it works:
39
+ 1. 🎤 **Upload your audio** - We accept WAV, MP3, and OGG formats
40
+ 2. 🔤 **AI transcribes your audio** - Using advanced speech recognition
41
+ 3. 🖼️ **Generate images from transcription** - AI creates visuals matching your content
42
+ 4. ✨ **Add animations** - Bring images to life with smooth transitions
43
+ 5. 🔄 **Synchronize with audio** - Perfectly timed to match your speech
44
+ 6. 📥 **Download your video** - Ready to share on social media
45
  """)
46
 
47
  # Initialize components with caching
 
65
  def get_video_creator():
66
  return VideoCreator()
67
 
68
+ # Cache for storing intermediate results
69
+ class ResultCache:
70
+ def __init__(self):
71
+ self.cache_dir = "cache"
72
+ os.makedirs(self.cache_dir, exist_ok=True)
73
+
74
+ def get_cache_path(self, key, extension=".pkl"):
75
+ # Create a hash of the key for the filename
76
+ hash_obj = hashlib.md5(key.encode())
77
+ return os.path.join(self.cache_dir, f"{hash_obj.hexdigest()}{extension}")
78
+
79
+ def exists(self, key, extension=".pkl"):
80
+ cache_path = self.get_cache_path(key, extension)
81
+ return os.path.exists(cache_path)
82
+
83
+ def save(self, key, data, extension=".pkl"):
84
+ import pickle
85
+ cache_path = self.get_cache_path(key, extension)
86
+ with open(cache_path, 'wb') as f:
87
+ pickle.dump(data, f)
88
+ return cache_path
89
+
90
+ def load(self, key, extension=".pkl"):
91
+ import pickle
92
+ cache_path = self.get_cache_path(key, extension)
93
+ if os.path.exists(cache_path):
94
+ with open(cache_path, 'rb') as f:
95
+ return pickle.load(f)
96
+ return None
97
+
98
+ def clear(self):
99
+ import shutil
100
+ for file in os.listdir(self.cache_dir):
101
+ file_path = os.path.join(self.cache_dir, file)
102
+ if os.path.isfile(file_path):
103
+ os.unlink(file_path)
104
+ elif os.path.isdir(file_path):
105
+ shutil.rmtree(file_path)
106
+
107
+ # Initialize cache
108
+ result_cache = ResultCache()
109
+
110
+ # Parallel processing functions
111
+ def process_audio_segment(segment, transcriber):
112
+ """Process a single audio segment in parallel"""
113
+ return transcriber.transcribe_segment(segment)
114
+
115
+ def generate_prompt_for_segment(transcription, prompt_generator):
116
+ """Generate a prompt for a single transcription in parallel"""
117
+ return prompt_generator.generate_optimized_prompt(transcription)
118
+
119
+ def generate_image_for_prompt(prompt, image_generator):
120
+ """Generate an image for a single prompt in parallel"""
121
+ return image_generator.generate_image(prompt)
122
+
123
+ def animate_image(image_path, animator, animation_type="random"):
124
+ """Animate a single image in parallel"""
125
+ return animator.animate_single_image(image_path, animation_type)
126
+
127
  # Main app flow
128
  def main():
129
+ # Settings sidebar with improved UI
 
 
 
130
  with st.sidebar:
131
+ st.markdown("## ⚙️ Settings")
132
+
133
+ # Performance settings with better organization
134
+ st.markdown("### 🚀 Performance")
135
+ with st.expander("Processing Options", expanded=True):
136
+ parallel_processing = st.toggle("Enable parallel processing", value=True,
137
+ help="Process multiple tasks simultaneously for faster results")
138
+ max_workers = st.slider("Max parallel workers", min_value=2, max_value=8, value=4,
139
+ help="Number of simultaneous tasks (higher values may use more memory)")
140
+ use_caching = st.toggle("Enable result caching", value=True,
141
+ help="Save results to speed up repeated conversions")
142
+
143
+ # Content settings
144
+ st.markdown("### 🎨 Content")
145
+ with st.expander("Segmentation", expanded=True):
146
+ num_segments = st.slider("Number of segments", min_value=2, max_value=10, value=5,
147
+ help="How many scenes to create in your video")
148
+ animation_type = st.selectbox(
149
+ "Animation style",
150
+ ["random", "zoom", "pan_right", "pan_left", "fade_in"],
151
+ help="Choose how images will animate in your video"
152
+ )
153
 
154
  # Advanced settings
155
+ st.markdown("### 🔧 Advanced")
156
+ with st.expander("Image Settings"):
157
  image_size = st.select_slider(
158
  "Image Size",
159
  options=[(256, 256), (384, 384), (512, 512)],
160
+ value=(384, 384), # Default to medium size for better performance
161
+ format_func=lambda x: f"{x[0]}x{x[1]}",
162
+ help="Larger sizes create higher quality images but take longer"
163
  )
164
+ inference_steps = st.slider("Image Quality", min_value=10, max_value=50, value=20,
165
+ help="Higher values create better images but take longer")
166
 
167
  with st.expander("Video Settings"):
168
  video_quality = st.select_slider(
169
  "Video Quality",
170
  options=["low", "medium", "high"],
171
+ value="medium",
172
+ help="Higher quality creates larger files"
173
  )
174
 
175
  # Map quality to bitrate
 
179
  "high": "2000k"
180
  }
181
  bitrate = bitrate_map[video_quality]
182
+
183
+ # Clear cache button
184
+ if st.button("🧹 Clear Cache", help="Remove all cached results to free up disk space"):
185
+ result_cache.clear()
186
+ st.success("Cache cleared successfully!")
187
+
188
+ # About section
189
+ st.markdown("---")
190
+ st.markdown("### 📝 About")
191
+ st.markdown("""
192
+ This app uses AI to convert audio to video.
193
+
194
+ Optimized for Hugging Face Spaces with:
195
+ - Parallel processing
196
+ - Memory-efficient models
197
+ - Result caching
198
+ - Batch processing
199
+ """)
200
+
201
+ # Main content area
202
+ # File uploader with better styling
203
+ st.markdown("### 📁 Upload Your Audio")
204
+ audio_file = st.file_uploader("Select an audio file (WAV, MP3, OGG)", type=["wav", "mp3", "ogg"])
205
 
206
  if audio_file is not None:
207
+ # Display audio player with better styling
208
+ st.markdown("### 🎵 Preview Your Audio")
209
  st.audio(audio_file)
210
 
211
+ # Generate a cache key based on the audio file and settings
212
+ audio_bytes = audio_file.getvalue()
213
+ settings_str = f"{num_segments}_{animation_type}_{image_size}_{inference_steps}_{video_quality}"
214
+ cache_key = hashlib.md5((hashlib.md5(audio_bytes).hexdigest() + settings_str).encode()).hexdigest()
215
+
216
+ # Process button with better styling
217
+ st.markdown("### 🔄 Process Your Audio")
218
+ convert_col, time_col = st.columns([3, 1])
219
+
220
+ with convert_col:
221
+ convert_button = st.button("🎬 Convert to Video", type="primary", use_container_width=True)
222
+
223
+ with time_col:
224
+ st.info("Processing time: ~1-3 minutes")
225
+
226
+ # Check if result is already in cache
227
+ if use_caching and result_cache.exists(cache_key, ".mp4") and convert_button:
228
+ output_video = result_cache.get_cache_path(cache_key, ".mp4")
229
+ st.success("✅ Found cached result! Loading video...")
230
+
231
+ # Display the cached video
232
+ st.markdown("### 🎥 Your Video")
233
+ st.video(output_video)
234
+
235
+ with open(output_video, "rb") as file:
236
+ st.download_button(
237
+ label="📥 Download Video",
238
+ data=file,
239
+ file_name="audio_to_video.mp4",
240
+ mime="video/mp4",
241
+ use_container_width=True
242
+ )
243
+ return
244
+
245
+ if convert_button:
246
+ # Initialize progress tracking with better UI
247
+ progress_container = st.container()
248
+ with progress_container:
249
+ progress_bar = st.progress(0)
250
+ status_text = st.empty()
251
+
252
+ # Add a processing animation
253
+ processing_col1, processing_col2 = st.columns([1, 3])
254
+ with processing_col1:
255
+ st.markdown("### Processing:")
256
+ with processing_col2:
257
+ status_message = st.empty()
258
 
259
  try:
260
  # Step 1: Initialize components
261
  status_text.text("Initializing components...")
262
+ status_message.markdown("🔄 **Setting up AI models...**")
263
  transcriber = get_transcriber()
264
  prompt_generator = get_prompt_generator()
265
  image_generator = get_image_generator()
266
  animator = get_animator()
267
  video_creator = get_video_creator()
268
+
269
+ # Update image generator settings
270
+ image_generator.set_inference_steps(inference_steps)
271
+ image_generator.set_target_size(image_size)
272
+
273
  progress_bar.progress(10)
274
 
275
  # Step 2: Segment and transcribe audio
276
+ status_text.text("Segmenting audio...")
277
+ status_message.markdown("🔊 **Analyzing audio...**")
278
  audio_segments, timestamps = transcriber.segment_audio(audio_file, num_segments=num_segments)
279
+ progress_bar.progress(15)
280
 
281
+ # Transcribe segments in parallel if enabled
282
+ status_text.text("Transcribing audio segments...")
283
+ status_message.markdown("🎤 **Converting speech to text...**")
284
+ if parallel_processing:
285
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
286
+ # Create a partial function with the transcriber
287
+ process_func = partial(process_audio_segment, transcriber=transcriber)
288
+ # Process segments in parallel
289
+ transcriptions = list(executor.map(process_func, audio_segments))
290
+ else:
291
+ transcriptions = [transcriber.transcribe_segment(segment) for segment in audio_segments]
292
+
293
+ # Display transcriptions with better styling
294
  progress_bar.progress(30)
295
+ st.markdown("### 📝 Transcriptions")
296
+ for i, (trans, (start, end)) in enumerate(zip(transcriptions, timestamps)):
297
+ st.markdown(f"""
298
+ <div style="background-color: #f0f2f6; padding: 10px; border-radius: 5px; margin-bottom: 10px;">
299
+ <strong>Segment {i+1} ({start:.1f}s - {end:.1f}s):</strong> {trans}
300
+ </div>
301
+ """, unsafe_allow_html=True)
302
 
303
+ # Step 3: Generate prompts in parallel
304
  status_text.text("Generating prompts from transcriptions...")
305
+ status_message.markdown("✍️ **Creating image descriptions...**")
306
+ if parallel_processing:
307
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
308
+ # Create a partial function with the prompt generator
309
+ prompt_func = partial(generate_prompt_for_segment, prompt_generator=prompt_generator)
310
+ # Generate prompts in parallel
311
+ prompts = list(executor.map(prompt_func, transcriptions))
312
+ else:
313
+ prompts = [prompt_generator.generate_optimized_prompt(trans) for trans in transcriptions]
314
 
315
+ # Display prompts with better styling
 
 
 
316
  progress_bar.progress(40)
317
+ st.markdown("### 🖋️ Generated Prompts")
318
+ for i, prompt in enumerate(prompts):
319
+ st.markdown(f"""
320
+ <div style="background-color: #e8f4f8; padding: 10px; border-radius: 5px; margin-bottom: 10px;">
321
+ <strong>Prompt {i+1}:</strong> {prompt}
322
+ </div>
323
+ """, unsafe_allow_html=True)
324
 
325
+ # Step 4: Generate images in parallel
326
  status_text.text("Generating images from prompts...")
327
+ status_message.markdown("🎨 **Creating images...**")
328
+ if parallel_processing:
329
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
330
+ # Create a partial function with the image generator
331
+ image_func = partial(generate_image_for_prompt, image_generator=image_generator)
332
+ # Generate images in parallel
333
+ images = list(executor.map(image_func, prompts))
334
+ else:
335
+ images = []
336
+ for i, prompt in enumerate(prompts):
337
+ status_text.text(f"Generating image {i+1}/{len(prompts)}...")
338
+ images.append(image_generator.generate_image(prompt))
339
 
340
+ # Display images with better styling
 
 
 
 
341
  progress_bar.progress(60)
342
+ st.markdown("### 🖼️ Generated Images")
343
+ image_cols = st.columns(min(len(images), 3))
344
+ for i, img_path in enumerate(images):
345
+ with image_cols[i % len(image_cols)]:
346
+ st.image(img_path, caption=f"Image {i+1}", use_column_width=True)
347
 
348
+ # Step 5: Add animations in parallel
349
  status_text.text("Adding animations to images...")
350
+ status_message.markdown("✨ **Adding animations...**")
351
+ if parallel_processing:
352
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
353
+ # Create a partial function with the animator and animation type
354
+ animate_func = partial(animate_image, animator=animator, animation_type=animation_type)
355
+ # Animate images in parallel
356
+ animated_frames = list(executor.map(animate_func, images))
357
+ else:
358
+ animated_frames = []
359
+ for i, img_path in enumerate(images):
360
+ status_text.text(f"Animating image {i+1}/{len(images)}...")
361
+ animated_frames.append(animator.animate_single_image(img_path, animation_type))
362
+
363
  progress_bar.progress(80)
364
 
365
  # Step 6: Create video
366
  status_text.text("Creating final video...")
367
+ status_message.markdown("🎬 **Assembling video...**")
368
  output_video = video_creator.create_video_from_frames(
369
  animated_frames,
370
  audio_file,
371
  segments=transcriptions,
372
+ timestamps=timestamps,
373
+ parallel=parallel_processing,
374
+ max_workers=max_workers
375
  )
376
 
377
  # Optimize video if needed
378
  if video_quality != "high":
379
  status_text.text("Optimizing video for web...")
380
+ status_message.markdown("⚙️ **Optimizing video...**")
381
  output_video = video_creator.optimize_video(
382
  output_video,
383
  target_size=(640, 480) if video_quality == "low" else (854, 480),
384
+ bitrate=bitrate,
385
+ threads=max_workers
386
  )
387
 
388
+ # Cache the result if caching is enabled
389
+ if use_caching:
390
+ import shutil
391
+ cached_path = result_cache.get_cache_path(cache_key, ".mp4")
392
+ shutil.copy(output_video, cached_path)
393
+
394
  progress_bar.progress(100)
395
  status_text.text("Video creation complete!")
396
+ status_message.markdown("✅ **Done!**")
397
 
398
+ # Step 7: Display and provide download link with better styling
399
+ st.markdown("### 🎥 Your Video")
400
  st.video(output_video)
401
 
402
+ st.markdown("### 📥 Download")
403
  with open(output_video, "rb") as file:
404
  st.download_button(
405
+ label="📥 Download Video",
406
  data=file,
407
  file_name="audio_to_video.mp4",
408
+ mime="video/mp4",
409
+ use_container_width=True
410
  )
411
 
412
+ # Performance metrics
413
+ st.markdown("### ⏱️ Performance Metrics")
414
+ st.info(f"""
415
+ - Parallel Processing: {'Enabled' if parallel_processing else 'Disabled'}
416
+ - Workers: {max_workers}
417
+ - Image Size: {image_size[0]}x{image_size[1]}
418
+ - Inference Steps: {inference_steps}
419
+ - Video Quality: {video_quality.capitalize()}
420
+ """)
421
+
422
  # Clean up temporary files
423
  status_text.text("Cleaning up temporary files...")
424
  for path in images + [p for frames in animated_frames for p in frames]:
 
433
  except Exception as e:
434
  st.error(f"An error occurred: {str(e)}")
435
  st.exception(e)
436
+
437
+ # Provide troubleshooting tips
438
+ st.markdown("### 🔧 Troubleshooting Tips")
439
+ st.info("""
440
+ - Try reducing the number of segments
441
+ - Use a smaller image size
442
+ - Reduce inference steps
443
+ - Make sure your audio file is in a supported format
444
+ - Clear the cache and try again
445
+ """)
446
 
447
  if __name__ == "__main__":
448
  main()
image_generator.py CHANGED
@@ -1,19 +1,25 @@
1
  import streamlit as st
2
  import torch
3
- from diffusers import StableDiffusionPipeline
4
- from PIL import Image
5
  import os
 
 
6
  import time
 
 
7
 
8
  class ImageGenerator:
9
  def __init__(self):
10
  self.model = None
 
 
11
 
12
  def load_model(self):
13
  """Load a lightweight image generation model"""
14
  if self.model is None:
15
  with st.spinner("Loading image generation model... This may take a moment."):
16
  # Using a lightweight model for image generation
 
 
17
  model_id = "runwayml/stable-diffusion-v1-5"
18
 
19
  # Load with memory optimization settings
@@ -31,9 +37,53 @@ class ImageGenerator:
31
  if hasattr(self.model, 'enable_attention_slicing'):
32
  self.model.enable_attention_slicing()
33
 
 
 
 
 
 
 
 
 
 
 
 
34
  return self.model
35
 
36
- def generate_images(self, prompts, output_dir="temp", progress_callback=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  """Generate images from the prompts"""
38
  # Load the model if not already loaded
39
  model = self.load_model()
@@ -41,30 +91,34 @@ class ImageGenerator:
41
  # Ensure output directory exists
42
  os.makedirs(output_dir, exist_ok=True)
43
 
44
- images = []
45
- for i, prompt in enumerate(prompts):
46
- if progress_callback:
47
- progress_callback(f"Generating image {i+1}/{len(prompts)}...")
48
-
49
- # Generate image with minimal inference steps to save resources
50
- image = model(
51
- prompt,
52
- num_inference_steps=20, # Reduced steps for speed
53
- guidance_scale=7.5 # Standard guidance scale
54
- ).images[0]
55
-
56
- # Save the image
57
- image_path = f"{output_dir}/image_{i}.png"
58
- image.save(image_path)
59
- images.append(image_path)
60
-
61
- # Small delay to prevent resource exhaustion
62
- time.sleep(0.5)
 
63
 
64
  return images
65
 
66
- def optimize_image(self, image_path, target_size=(512, 512)):
67
  """Optimize image size for video creation"""
 
 
 
68
  img = Image.open(image_path)
69
 
70
  # Resize to target size
@@ -75,11 +129,69 @@ class ImageGenerator:
75
 
76
  return image_path
77
 
78
- def optimize_all_images(self, image_paths, target_size=(512, 512)):
79
  """Optimize all images for video creation"""
80
- optimized_paths = []
81
- for path in image_paths:
82
- optimized_path = self.optimize_image(path, target_size)
83
- optimized_paths.append(optimized_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
  return optimized_paths
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import torch
 
 
3
  import os
4
+ import numpy as np
5
+ from PIL import Image
6
  import time
7
+ from concurrent.futures import ThreadPoolExecutor
8
+ from functools import partial
9
 
10
  class ImageGenerator:
11
  def __init__(self):
12
  self.model = None
13
+ self.inference_steps = 20
14
+ self.target_size = (384, 384)
15
 
16
  def load_model(self):
17
  """Load a lightweight image generation model"""
18
  if self.model is None:
19
  with st.spinner("Loading image generation model... This may take a moment."):
20
  # Using a lightweight model for image generation
21
+ from diffusers import StableDiffusionPipeline
22
+
23
  model_id = "runwayml/stable-diffusion-v1-5"
24
 
25
  # Load with memory optimization settings
 
37
  if hasattr(self.model, 'enable_attention_slicing'):
38
  self.model.enable_attention_slicing()
39
 
40
+ # Enable memory efficient attention
41
+ if hasattr(self.model, 'enable_vae_slicing'):
42
+ self.model.enable_vae_slicing()
43
+
44
+ # Enable xformers memory efficient attention if available
45
+ try:
46
+ if hasattr(self.model, 'enable_xformers_memory_efficient_attention'):
47
+ self.model.enable_xformers_memory_efficient_attention()
48
+ except:
49
+ pass
50
+
51
  return self.model
52
 
53
+ def set_inference_steps(self, steps):
54
+ """Set the number of inference steps"""
55
+ self.inference_steps = steps
56
+
57
+ def set_target_size(self, size):
58
+ """Set the target image size"""
59
+ self.target_size = size
60
+
61
+ def generate_image(self, prompt, output_dir="temp"):
62
+ """Generate a single image from a prompt"""
63
+ # Load the model if not already loaded
64
+ model = self.load_model()
65
+
66
+ # Ensure output directory exists
67
+ os.makedirs(output_dir, exist_ok=True)
68
+
69
+ # Generate image with minimal inference steps to save resources
70
+ image = model(
71
+ prompt,
72
+ num_inference_steps=self.inference_steps,
73
+ guidance_scale=7.5
74
+ ).images[0]
75
+
76
+ # Resize to target size for consistency and performance
77
+ if image.size != self.target_size:
78
+ image = image.resize(self.target_size, Image.LANCZOS)
79
+
80
+ # Save the image
81
+ image_path = f"{output_dir}/image_{int(time.time() * 1000)}.png"
82
+ image.save(image_path)
83
+
84
+ return image_path
85
+
86
+ def generate_images(self, prompts, output_dir="temp", progress_callback=None, parallel=False, max_workers=4):
87
  """Generate images from the prompts"""
88
  # Load the model if not already loaded
89
  model = self.load_model()
 
91
  # Ensure output directory exists
92
  os.makedirs(output_dir, exist_ok=True)
93
 
94
+ if parallel and len(prompts) > 1:
95
+ # Generate images in parallel
96
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
97
+ # Create a partial function with fixed parameters
98
+ generate_func = partial(self.generate_image, output_dir=output_dir)
99
+
100
+ # Process prompts in parallel and collect results
101
+ if progress_callback:
102
+ progress_callback("Generating images in parallel...")
103
+
104
+ images = list(executor.map(generate_func, prompts))
105
+ else:
106
+ # Generate images sequentially
107
+ images = []
108
+ for i, prompt in enumerate(prompts):
109
+ if progress_callback:
110
+ progress_callback(f"Generating image {i+1}/{len(prompts)}...")
111
+
112
+ image_path = self.generate_image(prompt, output_dir)
113
+ images.append(image_path)
114
 
115
  return images
116
 
117
+ def optimize_image(self, image_path, target_size=None):
118
  """Optimize image size for video creation"""
119
+ if target_size is None:
120
+ target_size = self.target_size
121
+
122
  img = Image.open(image_path)
123
 
124
  # Resize to target size
 
129
 
130
  return image_path
131
 
132
+ def optimize_all_images(self, image_paths, target_size=None, parallel=False, max_workers=4):
133
  """Optimize all images for video creation"""
134
+ if target_size is None:
135
+ target_size = self.target_size
136
+
137
+ if parallel and len(image_paths) > 1:
138
+ # Optimize images in parallel
139
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
140
+ # Create a partial function with fixed parameters
141
+ optimize_func = partial(self.optimize_image, target_size=target_size)
142
+
143
+ # Process images in parallel
144
+ optimized_paths = list(executor.map(optimize_func, image_paths))
145
+ else:
146
+ # Optimize images sequentially
147
+ optimized_paths = []
148
+ for path in image_paths:
149
+ optimized_path = self.optimize_image(path, target_size)
150
+ optimized_paths.append(optimized_path)
151
 
152
  return optimized_paths
153
+
154
+ def batch_generate_images(self, prompts, batch_size=2, output_dir="temp", progress_callback=None):
155
+ """Generate images in batches to optimize memory usage"""
156
+ # Load the model if not already loaded
157
+ model = self.load_model()
158
+
159
+ # Ensure output directory exists
160
+ os.makedirs(output_dir, exist_ok=True)
161
+
162
+ images = []
163
+
164
+ # Process prompts in batches
165
+ for i in range(0, len(prompts), batch_size):
166
+ batch_prompts = prompts[i:i+batch_size]
167
+
168
+ if progress_callback:
169
+ progress_callback(f"Generating batch {i//batch_size + 1}/{(len(prompts) + batch_size - 1)//batch_size}...")
170
+
171
+ # Generate images for this batch
172
+ batch_images = []
173
+ for j, prompt in enumerate(batch_prompts):
174
+ # Generate image
175
+ image = model(
176
+ prompt,
177
+ num_inference_steps=self.inference_steps,
178
+ guidance_scale=7.5
179
+ ).images[0]
180
+
181
+ # Resize to target size
182
+ if image.size != self.target_size:
183
+ image = image.resize(self.target_size, Image.LANCZOS)
184
+
185
+ # Save the image
186
+ image_path = f"{output_dir}/image_{i+j}_{int(time.time() * 1000)}.png"
187
+ image.save(image_path)
188
+ batch_images.append(image_path)
189
+
190
+ # Add batch results to overall results
191
+ images.extend(batch_images)
192
+
193
+ # Clear CUDA cache if using GPU
194
+ if torch.cuda.is_available():
195
+ torch.cuda.empty_cache()
196
+
197
+ return images
prompt_generator.py CHANGED
@@ -5,6 +5,7 @@ from transformers import pipeline
5
  class PromptGenerator:
6
  def __init__(self):
7
  self.model = None
 
8
 
9
  def load_model(self):
10
  """Load a lightweight text generation model"""
@@ -14,6 +15,37 @@ class PromptGenerator:
14
  self.model = pipeline("text-generation", model="distilgpt2")
15
  return self.model
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def generate_prompts(self, text, num_segments=5):
18
  """Generate image prompts from the transcription"""
19
  # Load the model if not already loaded
@@ -50,25 +82,24 @@ class PromptGenerator:
50
 
51
  return prompts, segments
52
 
53
- def generate_optimized_prompts(self, transcriptions, timestamps=None):
54
- """Generate optimized prompts from transcribed segments"""
 
 
 
55
  model = self.load_model()
56
 
57
- prompts = []
58
- for i, transcription in enumerate(transcriptions):
59
- # Skip empty transcriptions
60
- if not transcription.strip():
61
- continue
62
-
63
- # Create a prompt template focused on visual elements
64
- template = f"Describe a visual scene for: '{transcription}'"
65
-
66
- # Generate with minimal tokens to save resources
67
- result = model(template, max_length=30, num_return_sequences=1)
68
- generated_text = result[0]['generated_text'].replace(template, "").strip()
69
-
70
- # Create an optimized prompt with style keywords
71
- prompt = f"{transcription} {generated_text}, detailed, vibrant, cinematic"
72
- prompts.append(prompt)
73
 
74
  return prompts
 
 
 
 
 
 
5
  class PromptGenerator:
6
  def __init__(self):
7
  self.model = None
8
+ self.prompt_cache = {}
9
 
10
  def load_model(self):
11
  """Load a lightweight text generation model"""
 
15
  self.model = pipeline("text-generation", model="distilgpt2")
16
  return self.model
17
 
18
+ def generate_optimized_prompt(self, transcription):
19
+ """Generate an optimized prompt from a single transcription"""
20
+ # Check cache first
21
+ import hashlib
22
+ cache_key = hashlib.md5(transcription.encode()).hexdigest()
23
+
24
+ if cache_key in self.prompt_cache:
25
+ return self.prompt_cache[cache_key]
26
+
27
+ # Load the model if not already loaded
28
+ model = self.load_model()
29
+
30
+ # Skip empty transcriptions
31
+ if not transcription.strip():
32
+ return ""
33
+
34
+ # Create a prompt template focused on visual elements
35
+ template = f"Describe a visual scene for: '{transcription}'"
36
+
37
+ # Generate with minimal tokens to save resources
38
+ result = model(template, max_length=30, num_return_sequences=1)
39
+ generated_text = result[0]['generated_text'].replace(template, "").strip()
40
+
41
+ # Create an optimized prompt with style keywords
42
+ prompt = f"{transcription} {generated_text}, detailed, vibrant, cinematic"
43
+
44
+ # Cache the result
45
+ self.prompt_cache[cache_key] = prompt
46
+
47
+ return prompt
48
+
49
  def generate_prompts(self, text, num_segments=5):
50
  """Generate image prompts from the transcription"""
51
  # Load the model if not already loaded
 
82
 
83
  return prompts, segments
84
 
85
+ def generate_optimized_prompts(self, transcriptions, parallel=False, max_workers=4):
86
+ """Generate optimized prompts from transcribed segments with parallel processing"""
87
+ import concurrent.futures
88
+
89
+ # Load the model
90
  model = self.load_model()
91
 
92
+ if parallel and len(transcriptions) > 1:
93
+ # Process in parallel
94
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
95
+ prompts = list(executor.map(self.generate_optimized_prompt, transcriptions))
96
+ else:
97
+ # Process sequentially
98
+ prompts = [self.generate_optimized_prompt(trans) for trans in transcriptions]
 
 
 
 
 
 
 
 
 
99
 
100
  return prompts
101
+
102
+ def clear_cache(self):
103
+ """Clear the prompt cache"""
104
+ self.prompt_cache = {}
105
+ return True
requirements.txt CHANGED
@@ -4,7 +4,7 @@ torch --extra-index-url https://download.pytorch.org/whl/cpu
4
  torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
5
  diffusers
6
  accelerate
7
- moviepy==1.0.3
8
  librosa
9
  soundfile
10
  numpy
 
4
  torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
5
  diffusers
6
  accelerate
7
+ moviepy
8
  librosa
9
  soundfile
10
  numpy
transcriber.py CHANGED
@@ -1,16 +1,18 @@
1
  import streamlit as st
2
- import os
3
- import tempfile
4
  import torch
 
5
  import librosa
6
  import numpy as np
7
- from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
 
 
8
 
9
  class AudioTranscriber:
10
  def __init__(self):
11
  self.model = None
12
  self.processor = None
13
  self.pipe = None
 
14
 
15
  def load_model(self):
16
  """Load a lightweight transcription model"""
@@ -47,6 +49,14 @@ class AudioTranscriber:
47
 
48
  def transcribe(self, audio_file):
49
  """Transcribe the audio file using the loaded model"""
 
 
 
 
 
 
 
 
50
  # Load the model if not already loaded
51
  pipe = self.load_model()
52
 
@@ -63,6 +73,9 @@ class AudioTranscriber:
63
  result = pipe(y)
64
  transcription = result["text"]
65
 
 
 
 
66
  return transcription
67
  finally:
68
  # Clean up temporary file
@@ -109,13 +122,31 @@ class AudioTranscriber:
109
  if os.path.exists(tmp_path):
110
  os.unlink(tmp_path)
111
 
112
- def transcribe_segments(self, segments):
113
- """Transcribe individual audio segments"""
 
 
 
 
 
 
114
  pipe = self.load_model()
115
 
116
- transcriptions = []
117
- for segment in segments:
118
- result = pipe(segment)
119
- transcriptions.append(result["text"])
 
 
 
 
 
 
 
120
 
121
  return transcriptions
 
 
 
 
 
 
1
  import streamlit as st
 
 
2
  import torch
3
+ from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
4
  import librosa
5
  import numpy as np
6
+ import tempfile
7
+ import os
8
+ from concurrent.futures import ThreadPoolExecutor
9
 
10
  class AudioTranscriber:
11
  def __init__(self):
12
  self.model = None
13
  self.processor = None
14
  self.pipe = None
15
+ self.transcription_cache = {}
16
 
17
  def load_model(self):
18
  """Load a lightweight transcription model"""
 
49
 
50
  def transcribe(self, audio_file):
51
  """Transcribe the audio file using the loaded model"""
52
+ # Generate a cache key based on the audio file
53
+ import hashlib
54
+ cache_key = hashlib.md5(audio_file.getvalue()).hexdigest()
55
+
56
+ # Check if result is in cache
57
+ if cache_key in self.transcription_cache:
58
+ return self.transcription_cache[cache_key]
59
+
60
  # Load the model if not already loaded
61
  pipe = self.load_model()
62
 
 
73
  result = pipe(y)
74
  transcription = result["text"]
75
 
76
+ # Cache the result
77
+ self.transcription_cache[cache_key] = transcription
78
+
79
  return transcription
80
  finally:
81
  # Clean up temporary file
 
122
  if os.path.exists(tmp_path):
123
  os.unlink(tmp_path)
124
 
125
+ def transcribe_segment(self, segment):
126
+ """Transcribe a single audio segment"""
127
+ pipe = self.load_model()
128
+ result = pipe(segment)
129
+ return result["text"]
130
+
131
+ def transcribe_segments(self, segments, parallel=False, max_workers=4):
132
+ """Transcribe individual audio segments with optional parallel processing"""
133
  pipe = self.load_model()
134
 
135
+ if parallel and len(segments) > 1:
136
+ # Process in parallel using ThreadPoolExecutor
137
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
138
+ # Process segments in parallel
139
+ transcriptions = list(executor.map(self.transcribe_segment, segments))
140
+ else:
141
+ # Process sequentially
142
+ transcriptions = []
143
+ for segment in segments:
144
+ result = pipe(segment)
145
+ transcriptions.append(result["text"])
146
 
147
  return transcriptions
148
+
149
+ def clear_cache(self):
150
+ """Clear the transcription cache"""
151
+ self.transcription_cache = {}
152
+ return True
video_creator.py CHANGED
@@ -3,14 +3,54 @@ import os
3
  import tempfile
4
  from moviepy.editor import ImageSequenceClip, AudioFileClip, concatenate_videoclips, TextClip, CompositeVideoClip
5
  import numpy as np
 
 
6
 
7
  class VideoCreator:
8
  def __init__(self):
9
  # Ensure output directory exists
10
  os.makedirs("outputs", exist_ok=True)
 
11
 
12
- def create_video_from_frames(self, animated_frames, audio_file, segments=None, timestamps=None, output_dir="outputs"):
13
- """Create a video from animated frames synchronized with audio"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # Save the uploaded audio to a temporary file
15
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
16
  tmp_file.write(audio_file.getvalue())
@@ -32,37 +72,26 @@ class VideoCreator:
32
  # Create video clips for each animated segment
33
  video_clips = []
34
 
35
- for i, frames in enumerate(animated_frames):
36
- # Calculate frame duration based on segment duration
37
- segment_duration = segment_durations[min(i, len(segment_durations)-1)]
38
- frame_duration = segment_duration / len(frames)
39
-
40
- # Create a clip from the frames
41
- segment_clip = ImageSequenceClip(frames, durations=[frame_duration] * len(frames))
42
-
43
- # Add text overlay if segments are provided
44
- if segments and i < len(segments):
45
- segment_text = segments[i]
46
 
47
- # Create a simple text overlay using a workaround since TextClip might be resource-intensive
48
- # This is a simplified approach - in production, you'd use TextClip properly
49
- try:
50
- txt_clip = TextClip(
51
- segment_text,
52
- fontsize=24,
53
- color='white',
54
- bg_color='rgba(0,0,0,0.5)',
55
- size=(segment_clip.w, None),
56
- method='caption'
57
- ).set_duration(segment_clip.duration)
58
-
59
- txt_clip = txt_clip.set_position(('center', 'bottom'))
60
- segment_clip = CompositeVideoClip([segment_clip, txt_clip])
61
- except Exception as e:
62
- # If TextClip fails, continue without text overlay
63
- st.warning(f"Could not add text overlay: {e}")
64
-
65
- video_clips.append(segment_clip)
66
 
67
  # Concatenate all clips
68
  final_clip = concatenate_videoclips(video_clips)
@@ -71,7 +100,7 @@ class VideoCreator:
71
  final_clip = final_clip.set_audio(audio_clip)
72
 
73
  # Write the result to a file
74
- output_path = f"{output_dir}/output_video.mp4"
75
 
76
  # Use lower resolution and bitrate for faster processing
77
  final_clip.write_videofile(
@@ -80,10 +109,13 @@ class VideoCreator:
80
  codec='libx264',
81
  audio_codec='aac',
82
  preset='ultrafast', # Faster encoding
83
- threads=2, # Limit threads to save resources
84
  bitrate='1000k' # Lower bitrate
85
  )
86
 
 
 
 
87
  return output_path
88
 
89
  finally:
@@ -91,7 +123,7 @@ class VideoCreator:
91
  if os.path.exists(audio_path):
92
  os.unlink(audio_path)
93
 
94
- def optimize_video(self, video_path, target_size=(640, 480), bitrate='1000k'):
95
  """Optimize video size and quality for web delivery"""
96
  from moviepy.editor import VideoFileClip
97
 
@@ -102,13 +134,13 @@ class VideoCreator:
102
  clip_resized = clip.resize(target_size)
103
 
104
  # Save optimized video
105
- optimized_path = video_path.replace('.mp4', '_optimized.mp4')
106
  clip_resized.write_videofile(
107
  optimized_path,
108
  codec='libx264',
109
  audio_codec='aac',
110
  preset='ultrafast',
111
- threads=2,
112
  bitrate=bitrate
113
  )
114
 
@@ -117,3 +149,8 @@ class VideoCreator:
117
  clip_resized.close()
118
 
119
  return optimized_path
 
 
 
 
 
 
3
  import tempfile
4
  from moviepy.editor import ImageSequenceClip, AudioFileClip, concatenate_videoclips, TextClip, CompositeVideoClip
5
  import numpy as np
6
+ from concurrent.futures import ThreadPoolExecutor
7
+ import time
8
 
9
  class VideoCreator:
10
  def __init__(self):
11
  # Ensure output directory exists
12
  os.makedirs("outputs", exist_ok=True)
13
+ self.video_cache = {}
14
 
15
+ def create_segment_clip(self, frames, segment_duration, segment_text=None):
16
+ """Create a video clip from frames with optional text overlay"""
17
+ # Calculate frame duration based on segment duration
18
+ frame_duration = segment_duration / len(frames)
19
+
20
+ # Create a clip from the frames
21
+ segment_clip = ImageSequenceClip(frames, durations=[frame_duration] * len(frames))
22
+
23
+ # Add text overlay if segment text is provided
24
+ if segment_text:
25
+ try:
26
+ txt_clip = TextClip(
27
+ segment_text,
28
+ fontsize=24,
29
+ color='white',
30
+ bg_color='rgba(0,0,0,0.5)',
31
+ size=(segment_clip.w, None),
32
+ method='caption'
33
+ ).set_duration(segment_clip.duration)
34
+
35
+ txt_clip = txt_clip.set_position(('center', 'bottom'))
36
+ segment_clip = CompositeVideoClip([segment_clip, txt_clip])
37
+ except Exception as e:
38
+ # If TextClip fails, continue without text overlay
39
+ st.warning(f"Could not add text overlay: {e}")
40
+
41
+ return segment_clip
42
+
43
+ def create_video_from_frames(self, animated_frames, audio_file, segments=None, timestamps=None,
44
+ output_dir="outputs", parallel=False, max_workers=4):
45
+ """Create a video from animated frames synchronized with audio using parallel processing"""
46
+ # Generate a cache key based on inputs
47
+ import hashlib
48
+ cache_key = f"{hashlib.md5(audio_file.getvalue()).hexdigest()}_{len(animated_frames)}"
49
+
50
+ # Check if result is in cache
51
+ if cache_key in self.video_cache:
52
+ return self.video_cache[cache_key]
53
+
54
  # Save the uploaded audio to a temporary file
55
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
56
  tmp_file.write(audio_file.getvalue())
 
72
  # Create video clips for each animated segment
73
  video_clips = []
74
 
75
+ if parallel and len(animated_frames) > 1:
76
+ # Process segments in parallel
77
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
78
+ # Prepare arguments for parallel processing
79
+ args = []
80
+ for i, frames in enumerate(animated_frames):
81
+ segment_duration = segment_durations[min(i, len(segment_durations)-1)]
82
+ segment_text = segments[i] if segments and i < len(segments) else None
83
+ args.append((frames, segment_duration, segment_text))
 
 
84
 
85
+ # Process in parallel
86
+ video_clips = list(executor.map(lambda x: self.create_segment_clip(*x), args))
87
+ else:
88
+ # Process segments sequentially
89
+ for i, frames in enumerate(animated_frames):
90
+ segment_duration = segment_durations[min(i, len(segment_durations)-1)]
91
+ segment_text = segments[i] if segments and i < len(segments) else None
92
+
93
+ segment_clip = self.create_segment_clip(frames, segment_duration, segment_text)
94
+ video_clips.append(segment_clip)
 
 
 
 
 
 
 
 
 
95
 
96
  # Concatenate all clips
97
  final_clip = concatenate_videoclips(video_clips)
 
100
  final_clip = final_clip.set_audio(audio_clip)
101
 
102
  # Write the result to a file
103
+ output_path = f"{output_dir}/output_video_{int(time.time())}.mp4"
104
 
105
  # Use lower resolution and bitrate for faster processing
106
  final_clip.write_videofile(
 
109
  codec='libx264',
110
  audio_codec='aac',
111
  preset='ultrafast', # Faster encoding
112
+ threads=max_workers, # Use multiple threads for encoding
113
  bitrate='1000k' # Lower bitrate
114
  )
115
 
116
+ # Cache the result
117
+ self.video_cache[cache_key] = output_path
118
+
119
  return output_path
120
 
121
  finally:
 
123
  if os.path.exists(audio_path):
124
  os.unlink(audio_path)
125
 
126
+ def optimize_video(self, video_path, target_size=(640, 480), bitrate='1000k', threads=2):
127
  """Optimize video size and quality for web delivery"""
128
  from moviepy.editor import VideoFileClip
129
 
 
134
  clip_resized = clip.resize(target_size)
135
 
136
  # Save optimized video
137
+ optimized_path = video_path.replace('.mp4', f'_optimized_{int(time.time())}.mp4')
138
  clip_resized.write_videofile(
139
  optimized_path,
140
  codec='libx264',
141
  audio_codec='aac',
142
  preset='ultrafast',
143
+ threads=threads,
144
  bitrate=bitrate
145
  )
146
 
 
149
  clip_resized.close()
150
 
151
  return optimized_path
152
+
153
+ def clear_cache(self):
154
+ """Clear the video cache"""
155
+ self.video_cache = {}
156
+ return True