|
|
import os |
|
|
import tempfile |
|
|
import streamlit as st |
|
|
from gtts import gTTS |
|
|
from pydub import AudioSegment |
|
|
from moviepy.editor import ImageClip, concatenate_videoclips, AudioFileClip |
|
|
from pydub.exceptions import CouldntDecodeError |
|
|
|
|
|
|
|
|
tempfile.tempdir = "/tmp" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def text_to_speech(slide_texts, lang='en', gender='female', transition_delay=0): |
|
|
"""Convert text to speech with verified gender selection""" |
|
|
audio_clips = [] |
|
|
durations = [] |
|
|
|
|
|
|
|
|
tld_map = { |
|
|
'female': { |
|
|
'en': 'us', |
|
|
'es': 'es', |
|
|
'fr': 'fr', |
|
|
'de': 'de', |
|
|
'ja': 'jp' |
|
|
}, |
|
|
'male': { |
|
|
'en': 'co.uk', |
|
|
'es': 'com.mx', |
|
|
'fr': 'ca', |
|
|
'de': 'at', |
|
|
'ja': 'jp' |
|
|
} |
|
|
} |
|
|
|
|
|
for i, text in enumerate(slide_texts): |
|
|
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as fp: |
|
|
try: |
|
|
|
|
|
tld = tld_map[gender].get(lang, tld_map['female'][lang]) |
|
|
|
|
|
tts = gTTS( |
|
|
text=text, |
|
|
lang=lang, |
|
|
tld=tld, |
|
|
slow=False |
|
|
) |
|
|
tts.save(fp.name) |
|
|
clip = AudioSegment.from_mp3(fp.name) |
|
|
|
|
|
|
|
|
silence = AudioSegment.silent(duration=transition_delay*1000) |
|
|
clip_with_delay = clip + silence |
|
|
|
|
|
audio_clips.append(clip_with_delay) |
|
|
durations.append(len(clip_with_delay)) |
|
|
finally: |
|
|
os.unlink(fp.name) |
|
|
|
|
|
combined_audio = sum(audio_clips) |
|
|
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as fp: |
|
|
combined_audio.export(fp.name, format="mp3") |
|
|
return durations, fp.name |
|
|
|
|
|
def add_background_music(voice_path, music_path, volume_reduction=25): |
|
|
"""Mix voice-over with background music""" |
|
|
voice = AudioSegment.from_mp3(voice_path) |
|
|
|
|
|
if music_path: |
|
|
try: |
|
|
music = AudioSegment.from_file(music_path) |
|
|
music = music[:len(voice)].fade_out(2000) |
|
|
music = music - volume_reduction |
|
|
final_audio = voice.overlay(music) |
|
|
except CouldntDecodeError: |
|
|
raise ValueError("Invalid music file format") |
|
|
else: |
|
|
final_audio = voice |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as fp: |
|
|
final_audio.export(fp.name, format="mp3") |
|
|
return len(final_audio) / 1000, fp.name |
|
|
|
|
|
def create_video(img_paths, durations, audio_path): |
|
|
"""Generate video synchronized with audio""" |
|
|
clips = [] |
|
|
|
|
|
for img_path, duration in zip(img_paths, durations): |
|
|
clip = ImageClip(img_path).set_duration(duration / 1000) |
|
|
clips.append(clip) |
|
|
|
|
|
video = concatenate_videoclips(clips, method="compose") |
|
|
video = video.set_audio(AudioFileClip(audio_path)) |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as fp: |
|
|
video.write_videofile(fp.name, fps=24, threads=4) |
|
|
return fp.name |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.title("PNG Slides to Video Maker πΌοΈβ‘οΈπ₯") |
|
|
st.markdown("Upload PNG slides, add scripts, and generate a video!") |
|
|
|
|
|
|
|
|
st.warning(""" |
|
|
**Voice Gender Support:** |
|
|
β English (Male: British, Female: American) |
|
|
β Spanish (Male: Mexican, Female: European) |
|
|
β French (Male: Canadian, Female: France) |
|
|
β German (Male: Austrian, Female: German) |
|
|
β Japanese/Others: Female only |
|
|
""") |
|
|
|
|
|
|
|
|
uploaded_images = st.file_uploader( |
|
|
"Step 1: Upload PNG Slides", |
|
|
type=["png"], |
|
|
accept_multiple_files=True, |
|
|
key="main_uploader" |
|
|
) |
|
|
|
|
|
if not uploaded_images: |
|
|
st.info("βΉοΈ Please upload PNG slides to begin") |
|
|
st.stop() |
|
|
|
|
|
|
|
|
st.subheader("Step 2: Arrange Slide Order") |
|
|
filenames = [img.name for img in uploaded_images] |
|
|
st.session_state.slide_order = st.multiselect( |
|
|
"Drag to reorder slides:", |
|
|
filenames, |
|
|
default=filenames, |
|
|
key="sort_slides" |
|
|
) |
|
|
uploaded_images = [img for name in st.session_state.slide_order |
|
|
for img in uploaded_images if img.name == name] |
|
|
|
|
|
|
|
|
st.subheader("Step 3: Video Settings") |
|
|
col1, col2 = st.columns(2) |
|
|
with col1: |
|
|
transition_delay = st.slider( |
|
|
"Transition Delay (seconds)", |
|
|
min_value=0, |
|
|
max_value=5, |
|
|
value=2, |
|
|
help="Silence between slides after voice finishes" |
|
|
) |
|
|
with col2: |
|
|
gender = st.selectbox( |
|
|
"Voice Gender", |
|
|
options=['female', 'male'], |
|
|
help="Gender selection for supported languages" |
|
|
) |
|
|
|
|
|
|
|
|
lang = st.selectbox( |
|
|
"Voice Language", |
|
|
options=[ |
|
|
('English', 'en'), |
|
|
('Spanish', 'es'), |
|
|
('French', 'fr'), |
|
|
('German', 'de'), |
|
|
('Japanese', 'ja'), |
|
|
('Chinese', 'zh-CN'), |
|
|
('Hindi', 'hi') |
|
|
], |
|
|
format_func=lambda x: x[0], |
|
|
index=0 |
|
|
)[1] |
|
|
|
|
|
|
|
|
st.subheader("Step 4: Add Scripts") |
|
|
slide_texts = [] |
|
|
with st.expander(f"Scripts for {len(uploaded_images)} Slides", expanded=True): |
|
|
for i, img in enumerate(uploaded_images): |
|
|
text = st.text_area( |
|
|
f"Slide {i+1} Text", |
|
|
key=f"slide_{i}", |
|
|
placeholder="Enter text for this slide...", |
|
|
height=100 |
|
|
) |
|
|
slide_texts.append(text.strip()) |
|
|
|
|
|
|
|
|
st.subheader("Step 5: Background Music (Optional)") |
|
|
uploaded_music = st.file_uploader( |
|
|
"Upload MP3 file", |
|
|
type=["mp3"], |
|
|
key="music_uploader" |
|
|
) |
|
|
music_volume = st.slider( |
|
|
"Music Volume Reduction (dB)", |
|
|
0, 30, 25, |
|
|
help="Higher values make background music quieter" |
|
|
) if uploaded_music else 0 |
|
|
|
|
|
|
|
|
st.subheader("Step 6: Generate Video") |
|
|
if st.button("π Generate Video", use_container_width=True, type="primary"): |
|
|
|
|
|
if len(slide_texts) != len(uploaded_images): |
|
|
st.error("Number of scripts doesn't match number of slides!") |
|
|
st.stop() |
|
|
|
|
|
if any(not text for text in slide_texts): |
|
|
st.error("All slides must have non-empty text!") |
|
|
st.stop() |
|
|
|
|
|
with st.spinner("Creating your video... This may take a minute β³"): |
|
|
try: |
|
|
|
|
|
img_paths = [] |
|
|
for img in uploaded_images: |
|
|
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f: |
|
|
f.write(img.getbuffer()) |
|
|
img_paths.append(f.name) |
|
|
|
|
|
|
|
|
durations, voice_path = text_to_speech( |
|
|
slide_texts, |
|
|
lang, |
|
|
gender, |
|
|
transition_delay |
|
|
) |
|
|
|
|
|
|
|
|
music_path = None |
|
|
if uploaded_music: |
|
|
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f: |
|
|
f.write(uploaded_music.getbuffer()) |
|
|
music_path = f.name |
|
|
|
|
|
audio_duration, final_audio_path = add_background_music( |
|
|
voice_path, |
|
|
music_path, |
|
|
music_volume |
|
|
) |
|
|
|
|
|
|
|
|
video_path = create_video(img_paths, durations, final_audio_path) |
|
|
|
|
|
|
|
|
st.success("β
Video Ready! Play it below") |
|
|
st.video(video_path) |
|
|
|
|
|
|
|
|
cleanup_files = img_paths + [voice_path, final_audio_path] |
|
|
if music_path: |
|
|
cleanup_files.append(music_path) |
|
|
cleanup_files.append(video_path) |
|
|
|
|
|
for f in cleanup_files: |
|
|
if os.path.exists(f): |
|
|
os.unlink(f) |
|
|
|
|
|
except ValueError as e: |
|
|
st.error(f"Audio Error: {str(e)}") |
|
|
except Exception as e: |
|
|
st.error(f"Processing Error: {str(e)}") |