Update app.py
Browse files
app.py
CHANGED
|
@@ -7,10 +7,19 @@ import gradio as gr
|
|
| 7 |
import tempfile
|
| 8 |
import os
|
| 9 |
import soundfile as sf
|
| 10 |
-
from moviepy.editor import *
|
| 11 |
import warnings
|
| 12 |
warnings.filterwarnings('ignore')
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
# Import required models
|
| 15 |
from transformers import BlipProcessor, BlipForConditionalGeneration
|
| 16 |
from transformers import MusicgenForConditionalGeneration, AutoProcessor
|
|
@@ -157,60 +166,60 @@ class PhotoVideoSoundtrackGenerator:
|
|
| 157 |
|
| 158 |
return audio_data, sampling_rate
|
| 159 |
|
| 160 |
-
def
|
| 161 |
-
"""Create
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
# Convert PIL image to numpy array
|
| 165 |
-
img_array = np.array(image)
|
| 166 |
-
|
| 167 |
-
# Effect 1: Slow zoom in
|
| 168 |
-
def zoom_effect(get_frame, t):
|
| 169 |
-
frame = get_frame(t)
|
| 170 |
-
zoom_factor = 1 + (t / duration) * 0.3 # Zoom in by 30% over duration
|
| 171 |
-
h, w = frame.shape[:2]
|
| 172 |
-
new_h, new_w = int(h * zoom_factor), int(w * zoom_factor)
|
| 173 |
-
|
| 174 |
-
# Resize and center crop
|
| 175 |
-
resized = np.array(Image.fromarray(frame).resize((new_w, new_h), Image.Resampling.LANCZOS))
|
| 176 |
|
| 177 |
-
|
| 178 |
-
start_y = (new_h - h) // 2
|
| 179 |
-
start_x = (new_w - w) // 2
|
| 180 |
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
frame = get_frame(t)
|
| 186 |
-
h, w = frame.shape[:2]
|
| 187 |
|
| 188 |
-
#
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
offset_y = int(max_offset * 0.5 * np.cos(2 * np.pi * t / duration))
|
| 192 |
|
| 193 |
-
|
| 194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
src_end_y = min(h, h - offset_y)
|
| 200 |
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
dst_start_y = max(0, offset_y)
|
| 204 |
-
dst_end_y = min(h, h + offset_y)
|
| 205 |
|
| 206 |
-
|
|
|
|
|
|
|
| 207 |
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
|
| 212 |
def create_video(self, image, audio_data, sampling_rate, description, duration=30):
|
| 213 |
"""Create a video combining the image with the soundtrack"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
print("🎬 Creating video with visual effects...")
|
| 215 |
|
| 216 |
# Create temporary files
|
|
@@ -265,16 +274,18 @@ class PhotoVideoSoundtrackGenerator:
|
|
| 265 |
with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as video_file:
|
| 266 |
output_path = video_file.name
|
| 267 |
|
| 268 |
-
# Write video file
|
| 269 |
final_clip.write_videofile(
|
| 270 |
output_path,
|
| 271 |
fps=24,
|
| 272 |
codec='libx264',
|
| 273 |
audio_codec='aac',
|
|
|
|
| 274 |
temp_audiofile='temp-audio.m4a',
|
| 275 |
remove_temp=True,
|
| 276 |
verbose=False,
|
| 277 |
-
logger=None
|
|
|
|
| 278 |
)
|
| 279 |
|
| 280 |
# Cleanup
|
|
@@ -320,22 +331,41 @@ class PhotoVideoSoundtrackGenerator:
|
|
| 320 |
sf.write(audio_file.name, audio_data, sampling_rate)
|
| 321 |
audio_path = audio_file.name
|
| 322 |
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
|
| 328 |
progress(1.0, desc="Complete!")
|
| 329 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
return (
|
| 331 |
video_path,
|
| 332 |
-
|
| 333 |
(sampling_rate, audio_data),
|
| 334 |
audio_path
|
| 335 |
)
|
| 336 |
|
| 337 |
except Exception as e:
|
| 338 |
-
|
|
|
|
|
|
|
|
|
|
| 339 |
|
| 340 |
# Initialize the generator
|
| 341 |
print("🚀 Starting Photo-to-Video-Soundtrack Generator...")
|
|
@@ -364,6 +394,15 @@ def create_interface():
|
|
| 364 |
</div>
|
| 365 |
""")
|
| 366 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
with gr.Row():
|
| 368 |
with gr.Column(scale=1):
|
| 369 |
gr.Markdown("### 📤 Upload Your Photo")
|
|
@@ -455,14 +494,33 @@ transformers>=4.30.0
|
|
| 455 |
accelerate>=0.20.0
|
| 456 |
scipy>=1.10.0
|
| 457 |
soundfile>=0.12.0
|
| 458 |
-
gradio
|
| 459 |
-
moviepy>=1.0.3
|
| 460 |
Pillow>=9.5.0
|
| 461 |
numpy>=1.24.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 462 |
"""
|
| 463 |
|
| 464 |
# README.md content (create this as a separate file):
|
| 465 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 466 |
# 📸🎵 AI Photo to Video Soundtrack Generator
|
| 467 |
|
| 468 |
Transform your photos into cinematic videos with AI-generated soundtracks!
|
|
@@ -498,5 +556,21 @@ The AI recognizes and creates appropriate music for:
|
|
| 498 |
- **MoviePy**: For video creation and effects
|
| 499 |
- **Gradio**: For the user interface
|
| 500 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 501 |
Enjoy creating your AI-powered videos! 🎬✨
|
| 502 |
"""
|
|
|
|
| 7 |
import tempfile
|
| 8 |
import os
|
| 9 |
import soundfile as sf
|
|
|
|
| 10 |
import warnings
|
| 11 |
warnings.filterwarnings('ignore')
|
| 12 |
|
| 13 |
+
# Try to import MoviePy with fallback
|
| 14 |
+
try:
|
| 15 |
+
from moviepy.editor import *
|
| 16 |
+
MOVIEPY_AVAILABLE = True
|
| 17 |
+
print("✅ MoviePy imported successfully")
|
| 18 |
+
except ImportError as e:
|
| 19 |
+
print(f"⚠️ MoviePy import failed: {e}")
|
| 20 |
+
print("📹 Video generation will be disabled, but audio generation will still work")
|
| 21 |
+
MOVIEPY_AVAILABLE = False
|
| 22 |
+
|
| 23 |
# Import required models
|
| 24 |
from transformers import BlipProcessor, BlipForConditionalGeneration
|
| 25 |
from transformers import MusicgenForConditionalGeneration, AutoProcessor
|
|
|
|
| 166 |
|
| 167 |
return audio_data, sampling_rate
|
| 168 |
|
| 169 |
+
def create_simple_video_fallback(self, image, audio_data, sampling_rate, duration=30):
|
| 170 |
+
"""Create a simple video using imageio as fallback when MoviePy fails"""
|
| 171 |
+
try:
|
| 172 |
+
import imageio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
+
print("🎬 Creating simple video using fallback method...")
|
|
|
|
|
|
|
| 175 |
|
| 176 |
+
# Create frames by slightly zooming the image
|
| 177 |
+
frames = []
|
| 178 |
+
fps = 24
|
| 179 |
+
total_frames = fps * duration
|
|
|
|
|
|
|
| 180 |
|
| 181 |
+
# Convert PIL to numpy array
|
| 182 |
+
img_array = np.array(image)
|
| 183 |
+
h, w = img_array.shape[:2]
|
|
|
|
| 184 |
|
| 185 |
+
for i in range(total_frames):
|
| 186 |
+
# Simple zoom effect
|
| 187 |
+
progress = i / total_frames
|
| 188 |
+
zoom_factor = 1.0 + 0.2 * progress # Zoom in by 20%
|
| 189 |
+
|
| 190 |
+
# Resize image
|
| 191 |
+
new_h, new_w = int(h * zoom_factor), int(w * zoom_factor)
|
| 192 |
+
pil_img = Image.fromarray(img_array).resize((new_w, new_h), Image.Resampling.LANCZOS)
|
| 193 |
+
|
| 194 |
+
# Center crop back to original size
|
| 195 |
+
resized_array = np.array(pil_img)
|
| 196 |
+
start_y = (new_h - h) // 2
|
| 197 |
+
start_x = (new_w - w) // 2
|
| 198 |
+
cropped = resized_array[start_y:start_y+h, start_x:start_x+w]
|
| 199 |
+
|
| 200 |
+
frames.append(cropped)
|
| 201 |
|
| 202 |
+
# Create temporary video file
|
| 203 |
+
with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_video:
|
| 204 |
+
temp_video_path = temp_video.name
|
|
|
|
| 205 |
|
| 206 |
+
# Write video using imageio
|
| 207 |
+
imageio.mimsave(temp_video_path, frames, fps=fps)
|
|
|
|
|
|
|
| 208 |
|
| 209 |
+
# For audio, we'll just save it separately since this fallback is simpler
|
| 210 |
+
print("✅ Simple video created successfully!")
|
| 211 |
+
return temp_video_path
|
| 212 |
|
| 213 |
+
except Exception as e:
|
| 214 |
+
print(f"❌ Simple video creation also failed: {str(e)}")
|
| 215 |
+
return None
|
| 216 |
|
| 217 |
def create_video(self, image, audio_data, sampling_rate, description, duration=30):
|
| 218 |
"""Create a video combining the image with the soundtrack"""
|
| 219 |
+
if not MOVIEPY_AVAILABLE:
|
| 220 |
+
print("⚠️ MoviePy not available - skipping video creation")
|
| 221 |
+
return None
|
| 222 |
+
|
| 223 |
print("🎬 Creating video with visual effects...")
|
| 224 |
|
| 225 |
# Create temporary files
|
|
|
|
| 274 |
with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as video_file:
|
| 275 |
output_path = video_file.name
|
| 276 |
|
| 277 |
+
# Write video file with more conservative settings for HF Spaces
|
| 278 |
final_clip.write_videofile(
|
| 279 |
output_path,
|
| 280 |
fps=24,
|
| 281 |
codec='libx264',
|
| 282 |
audio_codec='aac',
|
| 283 |
+
bitrate='1000k',
|
| 284 |
temp_audiofile='temp-audio.m4a',
|
| 285 |
remove_temp=True,
|
| 286 |
verbose=False,
|
| 287 |
+
logger=None,
|
| 288 |
+
ffmpeg_params=['-preset', 'ultrafast']
|
| 289 |
)
|
| 290 |
|
| 291 |
# Cleanup
|
|
|
|
| 331 |
sf.write(audio_file.name, audio_data, sampling_rate)
|
| 332 |
audio_path = audio_file.name
|
| 333 |
|
| 334 |
+
# Create video if MoviePy is available
|
| 335 |
+
video_path = None
|
| 336 |
+
if MOVIEPY_AVAILABLE:
|
| 337 |
+
progress(0.7, desc="Creating video...")
|
| 338 |
+
video_path = self.create_video(image, audio_data, sampling_rate, description, duration=30)
|
| 339 |
+
|
| 340 |
+
# If MoviePy video creation failed, try simple fallback
|
| 341 |
+
if video_path is None:
|
| 342 |
+
progress(0.8, desc="Trying simple video creation...")
|
| 343 |
+
video_path = self.create_simple_video_fallback(image, audio_data, sampling_rate, duration=30)
|
| 344 |
+
else:
|
| 345 |
+
progress(0.7, desc="Trying simple video creation...")
|
| 346 |
+
video_path = self.create_simple_video_fallback(image, audio_data, sampling_rate, duration=30)
|
| 347 |
|
| 348 |
progress(1.0, desc="Complete!")
|
| 349 |
|
| 350 |
+
# Prepare status message
|
| 351 |
+
status_msg = f"**Image Description:** {description}\n\n**Music Style:** {music_prompt}"
|
| 352 |
+
if video_path is None:
|
| 353 |
+
status_msg += "\n\n⚠️ **Note:** Video generation failed, but audio was created successfully."
|
| 354 |
+
elif not MOVIEPY_AVAILABLE:
|
| 355 |
+
status_msg += "\n\n✅ **Note:** Video created using simple fallback method (MoviePy unavailable)."
|
| 356 |
+
|
| 357 |
return (
|
| 358 |
video_path,
|
| 359 |
+
status_msg,
|
| 360 |
(sampling_rate, audio_data),
|
| 361 |
audio_path
|
| 362 |
)
|
| 363 |
|
| 364 |
except Exception as e:
|
| 365 |
+
error_msg = f"Error: {str(e)}"
|
| 366 |
+
if not MOVIEPY_AVAILABLE:
|
| 367 |
+
error_msg += "\n\nNote: MoviePy is not available for video generation."
|
| 368 |
+
return None, error_msg, None, None
|
| 369 |
|
| 370 |
# Initialize the generator
|
| 371 |
print("🚀 Starting Photo-to-Video-Soundtrack Generator...")
|
|
|
|
| 394 |
</div>
|
| 395 |
""")
|
| 396 |
|
| 397 |
+
# Show status of video capabilities
|
| 398 |
+
if not MOVIEPY_AVAILABLE:
|
| 399 |
+
gr.HTML("""
|
| 400 |
+
<div style="background: #e3f2fd; border: 1px solid #90caf9; border-radius: 8px; padding: 15px; margin: 10px 0;">
|
| 401 |
+
<strong>ℹ️ Using Simple Video Mode</strong><br>
|
| 402 |
+
Advanced video effects unavailable, but basic video generation will still work!
|
| 403 |
+
</div>
|
| 404 |
+
""")
|
| 405 |
+
|
| 406 |
with gr.Row():
|
| 407 |
with gr.Column(scale=1):
|
| 408 |
gr.Markdown("### 📤 Upload Your Photo")
|
|
|
|
| 494 |
accelerate>=0.20.0
|
| 495 |
scipy>=1.10.0
|
| 496 |
soundfile>=0.12.0
|
| 497 |
+
gradio==4.44.0
|
|
|
|
| 498 |
Pillow>=9.5.0
|
| 499 |
numpy>=1.24.0
|
| 500 |
+
imageio>=2.31.1
|
| 501 |
+
imageio-ffmpeg>=0.4.8
|
| 502 |
+
moviepy==1.0.3
|
| 503 |
+
decorator>=4.4.2
|
| 504 |
+
proglog>=0.1.9
|
| 505 |
+
requests>=2.8.1
|
| 506 |
+
tqdm>=4.11.2
|
| 507 |
+
opencv-python-headless>=4.5.0
|
| 508 |
"""
|
| 509 |
|
| 510 |
# README.md content (create this as a separate file):
|
| 511 |
"""
|
| 512 |
+
---
|
| 513 |
+
title: AI Photo to Video Soundtrack Generator
|
| 514 |
+
emoji: 🎬
|
| 515 |
+
colorFrom: blue
|
| 516 |
+
colorTo: purple
|
| 517 |
+
sdk: gradio
|
| 518 |
+
sdk_version: "4.44.0"
|
| 519 |
+
app_file: app.py
|
| 520 |
+
pinned: false
|
| 521 |
+
license: apache-2.0
|
| 522 |
+
---
|
| 523 |
+
|
| 524 |
# 📸🎵 AI Photo to Video Soundtrack Generator
|
| 525 |
|
| 526 |
Transform your photos into cinematic videos with AI-generated soundtracks!
|
|
|
|
| 556 |
- **MoviePy**: For video creation and effects
|
| 557 |
- **Gradio**: For the user interface
|
| 558 |
|
| 559 |
+
## ⚙️ Configuration
|
| 560 |
+
|
| 561 |
+
This app requires:
|
| 562 |
+
- GPU acceleration for optimal performance
|
| 563 |
+
- Approximately 4GB VRAM
|
| 564 |
+
- Internet connection for model downloads
|
| 565 |
+
|
| 566 |
+
## 🎯 Use Cases
|
| 567 |
+
|
| 568 |
+
Perfect for:
|
| 569 |
+
- Social media content creation
|
| 570 |
+
- Artistic projects
|
| 571 |
+
- Music visualization
|
| 572 |
+
- Creative storytelling
|
| 573 |
+
- Educational demonstrations
|
| 574 |
+
|
| 575 |
Enjoy creating your AI-powered videos! 🎬✨
|
| 576 |
"""
|