citoreh commited on
Commit
28fc925
·
verified ·
1 Parent(s): eab45d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -54
app.py CHANGED
@@ -7,10 +7,19 @@ import gradio as gr
7
  import tempfile
8
  import os
9
  import soundfile as sf
10
- from moviepy.editor import *
11
  import warnings
12
  warnings.filterwarnings('ignore')
13
 
 
 
 
 
 
 
 
 
 
 
14
  # Import required models
15
  from transformers import BlipProcessor, BlipForConditionalGeneration
16
  from transformers import MusicgenForConditionalGeneration, AutoProcessor
@@ -157,60 +166,60 @@ class PhotoVideoSoundtrackGenerator:
157
 
158
  return audio_data, sampling_rate
159
 
160
- def create_video_effects(self, image, duration=30):
161
- """Create visual effects for the video"""
162
- effects = []
163
-
164
- # Convert PIL image to numpy array
165
- img_array = np.array(image)
166
-
167
- # Effect 1: Slow zoom in
168
- def zoom_effect(get_frame, t):
169
- frame = get_frame(t)
170
- zoom_factor = 1 + (t / duration) * 0.3 # Zoom in by 30% over duration
171
- h, w = frame.shape[:2]
172
- new_h, new_w = int(h * zoom_factor), int(w * zoom_factor)
173
-
174
- # Resize and center crop
175
- resized = np.array(Image.fromarray(frame).resize((new_w, new_h), Image.Resampling.LANCZOS))
176
 
177
- # Calculate crop coordinates
178
- start_y = (new_h - h) // 2
179
- start_x = (new_w - w) // 2
180
 
181
- return resized[start_y:start_y+h, start_x:start_x+w]
182
-
183
- # Effect 2: Subtle pan (Ken Burns effect)
184
- def pan_effect(get_frame, t):
185
- frame = get_frame(t)
186
- h, w = frame.shape[:2]
187
 
188
- # Calculate pan offset (subtle movement)
189
- max_offset = min(w, h) * 0.05 # 5% of the smaller dimension
190
- offset_x = int(max_offset * np.sin(2 * np.pi * t / duration))
191
- offset_y = int(max_offset * 0.5 * np.cos(2 * np.pi * t / duration))
192
 
193
- # Apply pan by cropping and padding
194
- shifted = np.zeros_like(frame)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
 
196
- src_start_x = max(0, -offset_x)
197
- src_end_x = min(w, w - offset_x)
198
- src_start_y = max(0, -offset_y)
199
- src_end_y = min(h, h - offset_y)
200
 
201
- dst_start_x = max(0, offset_x)
202
- dst_end_x = min(w, w + offset_x)
203
- dst_start_y = max(0, offset_y)
204
- dst_end_y = min(h, h + offset_y)
205
 
206
- shifted[dst_start_y:dst_end_y, dst_start_x:dst_end_x] = frame[src_start_y:src_end_y, src_start_x:src_end_x]
 
 
207
 
208
- return shifted
209
-
210
- return [zoom_effect, pan_effect]
211
 
212
  def create_video(self, image, audio_data, sampling_rate, description, duration=30):
213
  """Create a video combining the image with the soundtrack"""
 
 
 
 
214
  print("🎬 Creating video with visual effects...")
215
 
216
  # Create temporary files
@@ -265,16 +274,18 @@ class PhotoVideoSoundtrackGenerator:
265
  with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as video_file:
266
  output_path = video_file.name
267
 
268
- # Write video file
269
  final_clip.write_videofile(
270
  output_path,
271
  fps=24,
272
  codec='libx264',
273
  audio_codec='aac',
 
274
  temp_audiofile='temp-audio.m4a',
275
  remove_temp=True,
276
  verbose=False,
277
- logger=None
 
278
  )
279
 
280
  # Cleanup
@@ -320,22 +331,41 @@ class PhotoVideoSoundtrackGenerator:
320
  sf.write(audio_file.name, audio_data, sampling_rate)
321
  audio_path = audio_file.name
322
 
323
- progress(0.7, desc="Creating video...")
324
-
325
- # Create video
326
- video_path = self.create_video(image, audio_data, sampling_rate, description, duration=30)
 
 
 
 
 
 
 
 
 
327
 
328
  progress(1.0, desc="Complete!")
329
 
 
 
 
 
 
 
 
330
  return (
331
  video_path,
332
- f"**Image Description:** {description}\n\n**Music Style:** {music_prompt}",
333
  (sampling_rate, audio_data),
334
  audio_path
335
  )
336
 
337
  except Exception as e:
338
- return None, f"Error: {str(e)}", None, None
 
 
 
339
 
340
  # Initialize the generator
341
  print("🚀 Starting Photo-to-Video-Soundtrack Generator...")
@@ -364,6 +394,15 @@ def create_interface():
364
  </div>
365
  """)
366
 
 
 
 
 
 
 
 
 
 
367
  with gr.Row():
368
  with gr.Column(scale=1):
369
  gr.Markdown("### 📤 Upload Your Photo")
@@ -455,14 +494,33 @@ transformers>=4.30.0
455
  accelerate>=0.20.0
456
  scipy>=1.10.0
457
  soundfile>=0.12.0
458
- gradio>=4.0.0
459
- moviepy>=1.0.3
460
  Pillow>=9.5.0
461
  numpy>=1.24.0
 
 
 
 
 
 
 
 
462
  """
463
 
464
  # README.md content (create this as a separate file):
465
  """
 
 
 
 
 
 
 
 
 
 
 
 
466
  # 📸🎵 AI Photo to Video Soundtrack Generator
467
 
468
  Transform your photos into cinematic videos with AI-generated soundtracks!
@@ -498,5 +556,21 @@ The AI recognizes and creates appropriate music for:
498
  - **MoviePy**: For video creation and effects
499
  - **Gradio**: For the user interface
500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
501
  Enjoy creating your AI-powered videos! 🎬✨
502
  """
 
7
  import tempfile
8
  import os
9
  import soundfile as sf
 
10
  import warnings
11
  warnings.filterwarnings('ignore')
12
 
13
+ # Try to import MoviePy with fallback
14
+ try:
15
+ from moviepy.editor import *
16
+ MOVIEPY_AVAILABLE = True
17
+ print("✅ MoviePy imported successfully")
18
+ except ImportError as e:
19
+ print(f"⚠️ MoviePy import failed: {e}")
20
+ print("📹 Video generation will be disabled, but audio generation will still work")
21
+ MOVIEPY_AVAILABLE = False
22
+
23
  # Import required models
24
  from transformers import BlipProcessor, BlipForConditionalGeneration
25
  from transformers import MusicgenForConditionalGeneration, AutoProcessor
 
166
 
167
  return audio_data, sampling_rate
168
 
169
+ def create_simple_video_fallback(self, image, audio_data, sampling_rate, duration=30):
170
+ """Create a simple video using imageio as fallback when MoviePy fails"""
171
+ try:
172
+ import imageio
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
+ print("🎬 Creating simple video using fallback method...")
 
 
175
 
176
+ # Create frames by slightly zooming the image
177
+ frames = []
178
+ fps = 24
179
+ total_frames = fps * duration
 
 
180
 
181
+ # Convert PIL to numpy array
182
+ img_array = np.array(image)
183
+ h, w = img_array.shape[:2]
 
184
 
185
+ for i in range(total_frames):
186
+ # Simple zoom effect
187
+ progress = i / total_frames
188
+ zoom_factor = 1.0 + 0.2 * progress # Zoom in by 20%
189
+
190
+ # Resize image
191
+ new_h, new_w = int(h * zoom_factor), int(w * zoom_factor)
192
+ pil_img = Image.fromarray(img_array).resize((new_w, new_h), Image.Resampling.LANCZOS)
193
+
194
+ # Center crop back to original size
195
+ resized_array = np.array(pil_img)
196
+ start_y = (new_h - h) // 2
197
+ start_x = (new_w - w) // 2
198
+ cropped = resized_array[start_y:start_y+h, start_x:start_x+w]
199
+
200
+ frames.append(cropped)
201
 
202
+ # Create temporary video file
203
+ with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_video:
204
+ temp_video_path = temp_video.name
 
205
 
206
+ # Write video using imageio
207
+ imageio.mimsave(temp_video_path, frames, fps=fps)
 
 
208
 
209
+ # For audio, we'll just save it separately since this fallback is simpler
210
+ print("✅ Simple video created successfully!")
211
+ return temp_video_path
212
 
213
+ except Exception as e:
214
+ print(f"❌ Simple video creation also failed: {str(e)}")
215
+ return None
216
 
217
  def create_video(self, image, audio_data, sampling_rate, description, duration=30):
218
  """Create a video combining the image with the soundtrack"""
219
+ if not MOVIEPY_AVAILABLE:
220
+ print("⚠️ MoviePy not available - skipping video creation")
221
+ return None
222
+
223
  print("🎬 Creating video with visual effects...")
224
 
225
  # Create temporary files
 
274
  with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as video_file:
275
  output_path = video_file.name
276
 
277
+ # Write video file with more conservative settings for HF Spaces
278
  final_clip.write_videofile(
279
  output_path,
280
  fps=24,
281
  codec='libx264',
282
  audio_codec='aac',
283
+ bitrate='1000k',
284
  temp_audiofile='temp-audio.m4a',
285
  remove_temp=True,
286
  verbose=False,
287
+ logger=None,
288
+ ffmpeg_params=['-preset', 'ultrafast']
289
  )
290
 
291
  # Cleanup
 
331
  sf.write(audio_file.name, audio_data, sampling_rate)
332
  audio_path = audio_file.name
333
 
334
+ # Create video if MoviePy is available
335
+ video_path = None
336
+ if MOVIEPY_AVAILABLE:
337
+ progress(0.7, desc="Creating video...")
338
+ video_path = self.create_video(image, audio_data, sampling_rate, description, duration=30)
339
+
340
+ # If MoviePy video creation failed, try simple fallback
341
+ if video_path is None:
342
+ progress(0.8, desc="Trying simple video creation...")
343
+ video_path = self.create_simple_video_fallback(image, audio_data, sampling_rate, duration=30)
344
+ else:
345
+ progress(0.7, desc="Trying simple video creation...")
346
+ video_path = self.create_simple_video_fallback(image, audio_data, sampling_rate, duration=30)
347
 
348
  progress(1.0, desc="Complete!")
349
 
350
+ # Prepare status message
351
+ status_msg = f"**Image Description:** {description}\n\n**Music Style:** {music_prompt}"
352
+ if video_path is None:
353
+ status_msg += "\n\n⚠️ **Note:** Video generation failed, but audio was created successfully."
354
+ elif not MOVIEPY_AVAILABLE:
355
+ status_msg += "\n\n✅ **Note:** Video created using simple fallback method (MoviePy unavailable)."
356
+
357
  return (
358
  video_path,
359
+ status_msg,
360
  (sampling_rate, audio_data),
361
  audio_path
362
  )
363
 
364
  except Exception as e:
365
+ error_msg = f"Error: {str(e)}"
366
+ if not MOVIEPY_AVAILABLE:
367
+ error_msg += "\n\nNote: MoviePy is not available for video generation."
368
+ return None, error_msg, None, None
369
 
370
  # Initialize the generator
371
  print("🚀 Starting Photo-to-Video-Soundtrack Generator...")
 
394
  </div>
395
  """)
396
 
397
+ # Show status of video capabilities
398
+ if not MOVIEPY_AVAILABLE:
399
+ gr.HTML("""
400
+ <div style="background: #e3f2fd; border: 1px solid #90caf9; border-radius: 8px; padding: 15px; margin: 10px 0;">
401
+ <strong>ℹ️ Using Simple Video Mode</strong><br>
402
+ Advanced video effects unavailable, but basic video generation will still work!
403
+ </div>
404
+ """)
405
+
406
  with gr.Row():
407
  with gr.Column(scale=1):
408
  gr.Markdown("### 📤 Upload Your Photo")
 
494
  accelerate>=0.20.0
495
  scipy>=1.10.0
496
  soundfile>=0.12.0
497
+ gradio==4.44.0
 
498
  Pillow>=9.5.0
499
  numpy>=1.24.0
500
+ imageio>=2.31.1
501
+ imageio-ffmpeg>=0.4.8
502
+ moviepy==1.0.3
503
+ decorator>=4.4.2
504
+ proglog>=0.1.9
505
+ requests>=2.8.1
506
+ tqdm>=4.11.2
507
+ opencv-python-headless>=4.5.0
508
  """
509
 
510
  # README.md content (create this as a separate file):
511
  """
512
+ ---
513
+ title: AI Photo to Video Soundtrack Generator
514
+ emoji: 🎬
515
+ colorFrom: blue
516
+ colorTo: purple
517
+ sdk: gradio
518
+ sdk_version: "4.44.0"
519
+ app_file: app.py
520
+ pinned: false
521
+ license: apache-2.0
522
+ ---
523
+
524
  # 📸🎵 AI Photo to Video Soundtrack Generator
525
 
526
  Transform your photos into cinematic videos with AI-generated soundtracks!
 
556
  - **MoviePy**: For video creation and effects
557
  - **Gradio**: For the user interface
558
 
559
+ ## ⚙️ Configuration
560
+
561
+ This app requires:
562
+ - GPU acceleration for optimal performance
563
+ - Approximately 4GB VRAM
564
+ - Internet connection for model downloads
565
+
566
+ ## 🎯 Use Cases
567
+
568
+ Perfect for:
569
+ - Social media content creation
570
+ - Artistic projects
571
+ - Music visualization
572
+ - Creative storytelling
573
+ - Educational demonstrations
574
+
575
  Enjoy creating your AI-powered videos! 🎬✨
576
  """