Spaces:

citoreh
/

AxAva

Sleeping

App Files Files Community

citoreh commited on Jul 28, 2025

Commit

28fc925

verified ·

1 Parent(s): eab45d3

Update app.py

Browse files

Files changed (1) hide show

app.py +128 -54

app.py CHANGED Viewed

@@ -7,10 +7,19 @@ import gradio as gr
 import tempfile
 import os
 import soundfile as sf
-from moviepy.editor import *
 import warnings
 warnings.filterwarnings('ignore')
 # Import required models
 from transformers import BlipProcessor, BlipForConditionalGeneration
 from transformers import MusicgenForConditionalGeneration, AutoProcessor
@@ -157,60 +166,60 @@ class PhotoVideoSoundtrackGenerator:
         return audio_data, sampling_rate
-    def create_video_effects(self, image, duration=30):
-        """Create visual effects for the video"""
-        effects = []
-        # Convert PIL image to numpy array
-        img_array = np.array(image)
-        # Effect 1: Slow zoom in
-        def zoom_effect(get_frame, t):
-            frame = get_frame(t)
-            zoom_factor = 1 + (t / duration) * 0.3  # Zoom in by 30% over duration
-            h, w = frame.shape[:2]
-            new_h, new_w = int(h * zoom_factor), int(w * zoom_factor)
-            # Resize and center crop
-            resized = np.array(Image.fromarray(frame).resize((new_w, new_h), Image.Resampling.LANCZOS))
-            # Calculate crop coordinates
-            start_y = (new_h - h) // 2
-            start_x = (new_w - w) // 2
-            return resized[start_y:start_y+h, start_x:start_x+w]
-        # Effect 2: Subtle pan (Ken Burns effect)
-        def pan_effect(get_frame, t):
-            frame = get_frame(t)
-            h, w = frame.shape[:2]
-            # Calculate pan offset (subtle movement)
-            max_offset = min(w, h) * 0.05  # 5% of the smaller dimension
-            offset_x = int(max_offset * np.sin(2 * np.pi * t / duration))
-            offset_y = int(max_offset * 0.5 * np.cos(2 * np.pi * t / duration))
-            # Apply pan by cropping and padding
-            shifted = np.zeros_like(frame)
-            src_start_x = max(0, -offset_x)
-            src_end_x = min(w, w - offset_x)
-            src_start_y = max(0, -offset_y)
-            src_end_y = min(h, h - offset_y)
-            dst_start_x = max(0, offset_x)
-            dst_end_x = min(w, w + offset_x)
-            dst_start_y = max(0, offset_y)
-            dst_end_y = min(h, h + offset_y)
-            shifted[dst_start_y:dst_end_y, dst_start_x:dst_end_x] = frame[src_start_y:src_end_y, src_start_x:src_end_x]
-            return shifted
-        return [zoom_effect, pan_effect]
     def create_video(self, image, audio_data, sampling_rate, description, duration=30):
         """Create a video combining the image with the soundtrack"""
         print("🎬 Creating video with visual effects...")
         # Create temporary files
@@ -265,16 +274,18 @@ class PhotoVideoSoundtrackGenerator:
             with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as video_file:
                 output_path = video_file.name
-            # Write video file
             final_clip.write_videofile(
                 output_path,
                 fps=24,
                 codec='libx264',
                 audio_codec='aac',
                 temp_audiofile='temp-audio.m4a',
                 remove_temp=True,
                 verbose=False,
-                logger=None
             )
             # Cleanup
@@ -320,22 +331,41 @@ class PhotoVideoSoundtrackGenerator:
                 sf.write(audio_file.name, audio_data, sampling_rate)
                 audio_path = audio_file.name
-            progress(0.7, desc="Creating video...")
-            # Create video
-            video_path = self.create_video(image, audio_data, sampling_rate, description, duration=30)
             progress(1.0, desc="Complete!")
             return (
                 video_path,
-                f"**Image Description:** {description}\n\n**Music Style:** {music_prompt}",
                 (sampling_rate, audio_data),
                 audio_path
             )
         except Exception as e:
-            return None, f"Error: {str(e)}", None, None
 # Initialize the generator
 print("🚀 Starting Photo-to-Video-Soundtrack Generator...")
@@ -364,6 +394,15 @@ def create_interface():
         </div>
         """)
         with gr.Row():
             with gr.Column(scale=1):
                 gr.Markdown("### 📤 Upload Your Photo")
@@ -455,14 +494,33 @@ transformers>=4.30.0
 accelerate>=0.20.0
 scipy>=1.10.0
 soundfile>=0.12.0
-gradio>=4.0.0
-moviepy>=1.0.3
 Pillow>=9.5.0
 numpy>=1.24.0
 """
 # README.md content (create this as a separate file):
 """
 # 📸🎵 AI Photo to Video Soundtrack Generator
 Transform your photos into cinematic videos with AI-generated soundtracks!
@@ -498,5 +556,21 @@ The AI recognizes and creates appropriate music for:
 - **MoviePy**: For video creation and effects
 - **Gradio**: For the user interface
 Enjoy creating your AI-powered videos! 🎬✨
 """

 import tempfile
 import os
 import soundfile as sf
 import warnings
 warnings.filterwarnings('ignore')
+# Try to import MoviePy with fallback
+try:
+    from moviepy.editor import *
+    MOVIEPY_AVAILABLE = True
+    print("✅ MoviePy imported successfully")
+except ImportError as e:
+    print(f"⚠️ MoviePy import failed: {e}")
+    print("📹 Video generation will be disabled, but audio generation will still work")
+    MOVIEPY_AVAILABLE = False
 # Import required models
 from transformers import BlipProcessor, BlipForConditionalGeneration
 from transformers import MusicgenForConditionalGeneration, AutoProcessor
         return audio_data, sampling_rate
+    def create_simple_video_fallback(self, image, audio_data, sampling_rate, duration=30):
+        """Create a simple video using imageio as fallback when MoviePy fails"""
+        try:
+            import imageio
+            print("🎬 Creating simple video using fallback method...")
+            # Create frames by slightly zooming the image
+            frames = []
+            fps = 24
+            total_frames = fps * duration
+            # Convert PIL to numpy array
+            img_array = np.array(image)
+            h, w = img_array.shape[:2]
+            for i in range(total_frames):
+                # Simple zoom effect
+                progress = i / total_frames
+                zoom_factor = 1.0 + 0.2 * progress  # Zoom in by 20%
+                # Resize image
+                new_h, new_w = int(h * zoom_factor), int(w * zoom_factor)
+                pil_img = Image.fromarray(img_array).resize((new_w, new_h), Image.Resampling.LANCZOS)
+                # Center crop back to original size
+                resized_array = np.array(pil_img)
+                start_y = (new_h - h) // 2
+                start_x = (new_w - w) // 2
+                cropped = resized_array[start_y:start_y+h, start_x:start_x+w]
+                frames.append(cropped)
+            # Create temporary video file
+            with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_video:
+                temp_video_path = temp_video.name
+            # Write video using imageio
+            imageio.mimsave(temp_video_path, frames, fps=fps)
+            # For audio, we'll just save it separately since this fallback is simpler
+            print("✅ Simple video created successfully!")
+            return temp_video_path
+        except Exception as e:
+            print(f"❌ Simple video creation also failed: {str(e)}")
+            return None
     def create_video(self, image, audio_data, sampling_rate, description, duration=30):
         """Create a video combining the image with the soundtrack"""
+        if not MOVIEPY_AVAILABLE:
+            print("⚠️ MoviePy not available - skipping video creation")
+            return None
         print("🎬 Creating video with visual effects...")
         # Create temporary files
             with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as video_file:
                 output_path = video_file.name
+            # Write video file with more conservative settings for HF Spaces
             final_clip.write_videofile(
                 output_path,
                 fps=24,
                 codec='libx264',
                 audio_codec='aac',
+                bitrate='1000k',
                 temp_audiofile='temp-audio.m4a',
                 remove_temp=True,
                 verbose=False,
+                logger=None,
+                ffmpeg_params=['-preset', 'ultrafast']
             )
             # Cleanup
                 sf.write(audio_file.name, audio_data, sampling_rate)
                 audio_path = audio_file.name
+            # Create video if MoviePy is available
+            video_path = None
+            if MOVIEPY_AVAILABLE:
+                progress(0.7, desc="Creating video...")
+                video_path = self.create_video(image, audio_data, sampling_rate, description, duration=30)
+                # If MoviePy video creation failed, try simple fallback
+                if video_path is None:
+                    progress(0.8, desc="Trying simple video creation...")
+                    video_path = self.create_simple_video_fallback(image, audio_data, sampling_rate, duration=30)
+            else:
+                progress(0.7, desc="Trying simple video creation...")
+                video_path = self.create_simple_video_fallback(image, audio_data, sampling_rate, duration=30)
             progress(1.0, desc="Complete!")
+            # Prepare status message
+            status_msg = f"**Image Description:** {description}\n\n**Music Style:** {music_prompt}"
+            if video_path is None:
+                status_msg += "\n\n⚠️ **Note:** Video generation failed, but audio was created successfully."
+            elif not MOVIEPY_AVAILABLE:
+                status_msg += "\n\n✅ **Note:** Video created using simple fallback method (MoviePy unavailable)."
             return (
                 video_path,
+                status_msg,
                 (sampling_rate, audio_data),
                 audio_path
             )
         except Exception as e:
+            error_msg = f"Error: {str(e)}"
+            if not MOVIEPY_AVAILABLE:
+                error_msg += "\n\nNote: MoviePy is not available for video generation."
+            return None, error_msg, None, None
 # Initialize the generator
 print("🚀 Starting Photo-to-Video-Soundtrack Generator...")
         </div>
         """)
+        # Show status of video capabilities
+        if not MOVIEPY_AVAILABLE:
+            gr.HTML("""
+            <div style="background: #e3f2fd; border: 1px solid #90caf9; border-radius: 8px; padding: 15px; margin: 10px 0;">
+                <strong>ℹ️ Using Simple Video Mode</strong><br>
+                Advanced video effects unavailable, but basic video generation will still work!
+            </div>
+            """)
         with gr.Row():
             with gr.Column(scale=1):
                 gr.Markdown("### 📤 Upload Your Photo")
 accelerate>=0.20.0
 scipy>=1.10.0
 soundfile>=0.12.0
+gradio==4.44.0
 Pillow>=9.5.0
 numpy>=1.24.0
+imageio>=2.31.1
+imageio-ffmpeg>=0.4.8
+moviepy==1.0.3
+decorator>=4.4.2
+proglog>=0.1.9
+requests>=2.8.1
+tqdm>=4.11.2
+opencv-python-headless>=4.5.0
 """
 # README.md content (create this as a separate file):
 """
+---
+title: AI Photo to Video Soundtrack Generator
+emoji: 🎬
+colorFrom: blue
+colorTo: purple
+sdk: gradio
+sdk_version: "4.44.0"
+app_file: app.py
+pinned: false
+license: apache-2.0
+---
 # 📸🎵 AI Photo to Video Soundtrack Generator
 Transform your photos into cinematic videos with AI-generated soundtracks!
 - **MoviePy**: For video creation and effects
 - **Gradio**: For the user interface
+## ⚙️ Configuration
+This app requires:
+- GPU acceleration for optimal performance
+- Approximately 4GB VRAM
+- Internet connection for model downloads
+## 🎯 Use Cases
+Perfect for:
+- Social media content creation
+- Artistic projects
+- Music visualization
+- Creative storytelling
+- Educational demonstrations
 Enjoy creating your AI-powered videos! 🎬✨
 """