Spaces:

WAQASCHANNA
/

Presentation_Slides_VoiceOver_Maker

Running

App Files Files Community

WAQASCHANNA commited on Feb 24, 2025

Commit

895252e

verified ·

1 Parent(s): d8392ee

Update app.py

Browse files

Files changed (1) hide show

app.py +196 -84

app.py CHANGED Viewed

@@ -10,108 +10,220 @@ from pydub.exceptions import CouldntDecodeError
 tempfile.tempdir = "/tmp"
 # ==================================================================
-# Core Functions (No changes needed here)
 # ==================================================================
-# [Keep all the core functions exactly as in previous version]
-# ... [text_to_speech, add_background_music, create_video functions] ...
 # ==================================================================
-# Streamlit UI - Fixed Layout Version
 # ==================================================================
 st.title("PNG Slides to Video Maker 🖼️➡️🎥")
 st.markdown("Upload PNG slides, add scripts, and generate a video!")
-# --- File Upload Section ---
 uploaded_images = st.file_uploader(
-    "Step 1: Upload PNG Slides",
-    type=["png"],
     accept_multiple_files=True,
     key="main_uploader"
 )
-# --- Only show other controls if images are uploaded ---
-if uploaded_images:
-    # --- Slide Ordering ---
-    st.subheader("Step 2: Arrange Slide Order")
-    filenames = [img.name for img in uploaded_images]
-    st.session_state.slide_order = st.multiselect(
-        "Drag to reorder slides:",
-        filenames,
-        default=filenames,
-        key="sort_slides"
     )
-    uploaded_images = [img for name in st.session_state.slide_order
-                      for img in uploaded_images if img.name == name]
-    # --- Settings Section ---
-    st.subheader("Step 3: Video Settings")
-    col1, col2 = st.columns(2)
-    with col1:
-        transition_delay = st.slider(
-            "Transition Delay (seconds)",
-            min_value=0,
-            max_value=5,
-            value=2,
-            help="Pause between slides after audio finishes"
         )
-    with col2:
-        gender = st.selectbox(
-            "Voice Gender",
-            options=['female', 'male'],
-            help="Gender selection available for supported languages"
-        )
-    lang = st.selectbox(
-        "Voice Language",
-        ['en', 'es', 'fr', 'de', 'ja', 'zh-CN', 'hi'],
-        index=0
-    )
-    # --- Script Input Section ---
-    st.subheader("Step 4: Add Scripts")
-    slide_texts = []
-    with st.expander(f"Scripts for {len(uploaded_images)} Slides"):
-        for i, img in enumerate(uploaded_images):
-            text = st.text_area(
-                f"Slide {i+1} Text",
-                key=f"slide_{i}",
-                placeholder="Enter text for this slide..."
             )
-            slide_texts.append(text.strip())
-    # --- Background Music Section ---
-    st.subheader("Optional: Background Music")
-    uploaded_music = st.file_uploader(
-        "Upload background music (MP3)",
-        type=["mp3"],
-        key="music_uploader"
-    )
-    music_volume = st.slider(
-        "Music Volume Reduction (dB)",
-        0, 30, 25,
-        help="Higher values make music quieter"
-    ) if uploaded_music else 0
-    # --- Generate Button ---
-    st.subheader("Step 5: Generate Video")
-    if st.button("🚀 Generate Video", use_container_width=True):
-        # Validation checks
-        if len(slide_texts) != len(uploaded_images):
-            st.error("Number of scripts doesn't match number of slides!")
-            st.stop()
-        if any(not text for text in slide_texts):
-            st.error("All slides must have non-empty text!")
-            st.stop()
-        with st.spinner("Creating your masterpiece..."):
-            try:
-                # [Keep processing code from previous version]
-                # ... [processing logic here] ...
-            except Exception as e:
-                st.error(f"Error: {str(e)}")
-else:
-    st.info("ℹ️ Please upload PNG slides to begin")

 tempfile.tempdir = "/tmp"
 # ==================================================================
+# Core Functions
 # ==================================================================
+def text_to_speech(slide_texts, lang='en', gender='female', transition_delay=0):
+    """Convert text to speech with gender selection and synchronized delays"""
+    audio_clips = []
+    durations = []
+    # Voice configuration mapping
+    tld_map = {
+        'female': {'en': 'com', 'es': 'es', 'fr': 'fr', 'de': 'de', 'ja': 'co.jp'},
+        'male': {'en': 'com.au', 'es': 'com.mx', 'fr': 'ca', 'de': 'de', 'ja': 'co.jp'}
+    }
+    for i, text in enumerate(slide_texts):
+        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as fp:
+            try:
+                # Generate speech with selected gender
+                tts = gTTS(
+                    text=text,
+                    lang=lang,
+                    tld=tld_map[gender].get(lang, 'com'),
+                    slow=False
+                )
+                tts.save(fp.name)
+                clip = AudioSegment.from_mp3(fp.name)
+                # Add transition delay as silence after each clip
+                silence = AudioSegment.silent(duration=transition_delay*1000)
+                clip_with_delay = clip + silence
+                audio_clips.append(clip_with_delay)
+                durations.append(len(clip_with_delay))
+            finally:
+                os.unlink(fp.name)
+    combined_audio = sum(audio_clips)
+    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as fp:
+        combined_audio.export(fp.name, format="mp3")
+        return durations, fp.name
+def add_background_music(voice_path, music_path, volume_reduction=25):
+    """Mix voice-over with background music"""
+    voice = AudioSegment.from_mp3(voice_path)
+    if music_path:
+        try:
+            music = AudioSegment.from_file(music_path)
+            music = music[:len(voice)].fade_out(2000)
+            music = music - volume_reduction
+            final_audio = voice.overlay(music)
+        except CouldntDecodeError:
+            raise ValueError("Invalid music file format")
+    else:
+        final_audio = voice
+    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as fp:
+        final_audio.export(fp.name, format="mp3")
+        return len(final_audio) / 1000, fp.name
+def create_video(img_paths, durations, audio_path):
+    """Generate video synchronized with audio"""
+    clips = []
+    for img_path, duration in zip(img_paths, durations):
+        clip = ImageClip(img_path).set_duration(duration / 1000)
+        clips.append(clip)
+    video = concatenate_videoclips(clips, method="compose")
+    video = video.set_audio(AudioFileClip(audio_path))
+    with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as fp:
+        video.write_videofile(fp.name, fps=24, threads=4)
+        return fp.name
 # ==================================================================
+# Streamlit UI
 # ==================================================================
 st.title("PNG Slides to Video Maker 🖼️➡️🎥")
 st.markdown("Upload PNG slides, add scripts, and generate a video!")
+# Main file uploader
 uploaded_images = st.file_uploader(
+    "Step 1: Upload PNG Slides",
+    type=["png"],
     accept_multiple_files=True,
     key="main_uploader"
 )
+if not uploaded_images:
+    st.info("ℹ️ Please upload PNG slides to begin")
+    st.stop()
+# Slide ordering
+st.subheader("Step 2: Arrange Slide Order")
+filenames = [img.name for img in uploaded_images]
+st.session_state.slide_order = st.multiselect(
+    "Drag to reorder slides:",
+    filenames,
+    default=filenames,
+    key="sort_slides"
+)
+uploaded_images = [img for name in st.session_state.slide_order
+                  for img in uploaded_images if img.name == name]
+# Video settings
+st.subheader("Step 3: Video Settings")
+col1, col2 = st.columns(2)
+with col1:
+    transition_delay = st.slider(
+        "Transition Delay (seconds)",
+        min_value=0,
+        max_value=5,
+        value=2,
+        help="Silence between slides after voice finishes"
+    )
+with col2:
+    gender = st.selectbox(
+        "Voice Gender",
+        options=['female', 'male'],
+        help="Gender selection for supported languages"
     )
+lang = st.selectbox(
+    "Voice Language",
+    ['en', 'es', 'fr', 'de', 'ja', 'zh-CN', 'hi'],
+    index=0
+)
+# Script input
+st.subheader("Step 4: Add Scripts")
+slide_texts = []
+with st.expander(f"Scripts for {len(uploaded_images)} Slides", expanded=True):
+    for i, img in enumerate(uploaded_images):
+        text = st.text_area(
+            f"Slide {i+1} Text",
+            key=f"slide_{i}",
+            placeholder="Enter text for this slide...",
+            height=100
         )
+        slide_texts.append(text.strip())
+# Music settings
+st.subheader("Step 5: Background Music (Optional)")
+uploaded_music = st.file_uploader(
+    "Upload MP3 file",
+    type=["mp3"],
+    key="music_uploader"
+)
+music_volume = st.slider(
+    "Music Volume Reduction (dB)",
+    0, 30, 25,
+    help="Higher values make background music quieter"
+) if uploaded_music else 0
+# Generate button
+st.subheader("Step 6: Generate Video")
+if st.button("🚀 Generate Video", use_container_width=True, type="primary"):
+    # Validation
+    if len(slide_texts) != len(uploaded_images):
+        st.error("Number of scripts doesn't match number of slides!")
+        st.stop()
+    if any(not text for text in slide_texts):
+        st.error("All slides must have non-empty text!")
+        st.stop()
+    with st.spinner("Creating your video... This may take a minute ⏳"):
+        try:
+            # 1. Save images to temp files
+            img_paths = []
+            for img in uploaded_images:
+                with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
+                    f.write(img.getbuffer())
+                    img_paths.append(f.name)
+            # 2. Generate voiceover with delays
+            durations, voice_path = text_to_speech(
+                slide_texts,
+                lang,
+                gender,
+                transition_delay
             )
+            # 3. Process background music
+            music_path = None
+            if uploaded_music:
+                with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
+                    f.write(uploaded_music.getbuffer())
+                    music_path = f.name
+            audio_duration, final_audio_path = add_background_music(
+                voice_path,
+                music_path,
+                music_volume
+            )
+            # 4. Create video
+            video_path = create_video(img_paths, durations, final_audio_path)
+            # 5. Display result
+            st.success("✅ Video Ready! Play it below")
+            st.video(video_path)
+            # 6. Cleanup
+            cleanup_files = img_paths + [voice_path, final_audio_path]
+            if music_path:
+                cleanup_files.append(music_path)
+            cleanup_files.append(video_path)
+            for f in cleanup_files:
+                if os.path.exists(f):
+                    os.unlink(f)
+        except ValueError as e:
+            st.error(f"Audio Error: {str(e)}")
+        except Exception as e:
+            st.error(f"Processing Error: {str(e)}")