WAQASCHANNA's picture
Update app.py
2b0ac50 verified
import os
import tempfile
import streamlit as st
from gtts import gTTS
from pydub import AudioSegment
from moviepy.editor import ImageClip, concatenate_videoclips, AudioFileClip
from pydub.exceptions import CouldntDecodeError
# Hugging Face Spaces configuration
tempfile.tempdir = "/tmp"
# ==================================================================
# Core Functions (Updated with working gender voices)
# ==================================================================
def text_to_speech(slide_texts, lang='en', gender='female', transition_delay=0):
"""Convert text to speech with verified gender selection"""
audio_clips = []
durations = []
# Verified voice configuration matrix
tld_map = {
'female': {
'en': 'us', # American English
'es': 'es', # European Spanish
'fr': 'fr', # French (France)
'de': 'de', # German (Germany)
'ja': 'jp' # Japanese
},
'male': {
'en': 'co.uk', # British English
'es': 'com.mx', # Mexican Spanish
'fr': 'ca', # Canadian French
'de': 'at', # Austrian German
'ja': 'jp' # Japanese (fallback)
}
}
for i, text in enumerate(slide_texts):
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as fp:
try:
# Get valid TLD for selected gender and language
tld = tld_map[gender].get(lang, tld_map['female'][lang])
tts = gTTS(
text=text,
lang=lang,
tld=tld,
slow=False
)
tts.save(fp.name)
clip = AudioSegment.from_mp3(fp.name)
# Add transition delay as silence
silence = AudioSegment.silent(duration=transition_delay*1000)
clip_with_delay = clip + silence
audio_clips.append(clip_with_delay)
durations.append(len(clip_with_delay))
finally:
os.unlink(fp.name)
combined_audio = sum(audio_clips)
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as fp:
combined_audio.export(fp.name, format="mp3")
return durations, fp.name
def add_background_music(voice_path, music_path, volume_reduction=25):
"""Mix voice-over with background music"""
voice = AudioSegment.from_mp3(voice_path)
if music_path:
try:
music = AudioSegment.from_file(music_path)
music = music[:len(voice)].fade_out(2000)
music = music - volume_reduction
final_audio = voice.overlay(music)
except CouldntDecodeError:
raise ValueError("Invalid music file format")
else:
final_audio = voice
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as fp:
final_audio.export(fp.name, format="mp3")
return len(final_audio) / 1000, fp.name
def create_video(img_paths, durations, audio_path):
"""Generate video synchronized with audio"""
clips = []
for img_path, duration in zip(img_paths, durations):
clip = ImageClip(img_path).set_duration(duration / 1000)
clips.append(clip)
video = concatenate_videoclips(clips, method="compose")
video = video.set_audio(AudioFileClip(audio_path))
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as fp:
video.write_videofile(fp.name, fps=24, threads=4)
return fp.name
# ==================================================================
# Streamlit UI (Updated with language support warnings)
# ==================================================================
st.title("PNG Slides to Video Maker πŸ–ΌοΈβž‘οΈπŸŽ₯")
st.markdown("Upload PNG slides, add scripts, and generate a video!")
# Language support warning
st.warning("""
**Voice Gender Support:**
βœ“ English (Male: British, Female: American)
βœ“ Spanish (Male: Mexican, Female: European)
βœ“ French (Male: Canadian, Female: France)
βœ“ German (Male: Austrian, Female: German)
βœ— Japanese/Others: Female only
""")
# Main file uploader
uploaded_images = st.file_uploader(
"Step 1: Upload PNG Slides",
type=["png"],
accept_multiple_files=True,
key="main_uploader"
)
if not uploaded_images:
st.info("ℹ️ Please upload PNG slides to begin")
st.stop()
# Slide ordering
st.subheader("Step 2: Arrange Slide Order")
filenames = [img.name for img in uploaded_images]
st.session_state.slide_order = st.multiselect(
"Drag to reorder slides:",
filenames,
default=filenames,
key="sort_slides"
)
uploaded_images = [img for name in st.session_state.slide_order
for img in uploaded_images if img.name == name]
# Video settings
st.subheader("Step 3: Video Settings")
col1, col2 = st.columns(2)
with col1:
transition_delay = st.slider(
"Transition Delay (seconds)",
min_value=0,
max_value=5,
value=2,
help="Silence between slides after voice finishes"
)
with col2:
gender = st.selectbox(
"Voice Gender",
options=['female', 'male'],
help="Gender selection for supported languages"
)
# Language selector with full names
lang = st.selectbox(
"Voice Language",
options=[
('English', 'en'),
('Spanish', 'es'),
('French', 'fr'),
('German', 'de'),
('Japanese', 'ja'),
('Chinese', 'zh-CN'),
('Hindi', 'hi')
],
format_func=lambda x: x[0],
index=0
)[1] # Get language code
# Script input
st.subheader("Step 4: Add Scripts")
slide_texts = []
with st.expander(f"Scripts for {len(uploaded_images)} Slides", expanded=True):
for i, img in enumerate(uploaded_images):
text = st.text_area(
f"Slide {i+1} Text",
key=f"slide_{i}",
placeholder="Enter text for this slide...",
height=100
)
slide_texts.append(text.strip())
# Music settings
st.subheader("Step 5: Background Music (Optional)")
uploaded_music = st.file_uploader(
"Upload MP3 file",
type=["mp3"],
key="music_uploader"
)
music_volume = st.slider(
"Music Volume Reduction (dB)",
0, 30, 25,
help="Higher values make background music quieter"
) if uploaded_music else 0
# Generate button
st.subheader("Step 6: Generate Video")
if st.button("πŸš€ Generate Video", use_container_width=True, type="primary"):
# Validation
if len(slide_texts) != len(uploaded_images):
st.error("Number of scripts doesn't match number of slides!")
st.stop()
if any(not text for text in slide_texts):
st.error("All slides must have non-empty text!")
st.stop()
with st.spinner("Creating your video... This may take a minute ⏳"):
try:
# 1. Save images to temp files
img_paths = []
for img in uploaded_images:
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
f.write(img.getbuffer())
img_paths.append(f.name)
# 2. Generate voiceover with delays
durations, voice_path = text_to_speech(
slide_texts,
lang,
gender,
transition_delay
)
# 3. Process background music
music_path = None
if uploaded_music:
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
f.write(uploaded_music.getbuffer())
music_path = f.name
audio_duration, final_audio_path = add_background_music(
voice_path,
music_path,
music_volume
)
# 4. Create video
video_path = create_video(img_paths, durations, final_audio_path)
# 5. Display result
st.success("βœ… Video Ready! Play it below")
st.video(video_path)
# 6. Cleanup
cleanup_files = img_paths + [voice_path, final_audio_path]
if music_path:
cleanup_files.append(music_path)
cleanup_files.append(video_path)
for f in cleanup_files:
if os.path.exists(f):
os.unlink(f)
except ValueError as e:
st.error(f"Audio Error: {str(e)}")
except Exception as e:
st.error(f"Processing Error: {str(e)}")