File size: 8,678 Bytes
10984db 89860f7 10984db 89860f7 10984db 67f3c64 f89322d 10984db 2b0ac50 10984db 895252e 2b0ac50 895252e 2b0ac50 895252e 2b0ac50 895252e 2b0ac50 895252e 2b0ac50 895252e 2b0ac50 895252e 10984db 2b0ac50 10984db d8392ee 67f3c64 2b0ac50 895252e d8392ee 895252e d8392ee 895252e d8392ee 2b0ac50 895252e 2b0ac50 895252e 2b0ac50 895252e d8392ee 895252e 67f3c64 895252e 89860f7 895252e 89860f7 895252e 89860f7 895252e 67f3c64 895252e 89860f7 895252e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 |
import os
import tempfile
import streamlit as st
from gtts import gTTS
from pydub import AudioSegment
from moviepy.editor import ImageClip, concatenate_videoclips, AudioFileClip
from pydub.exceptions import CouldntDecodeError
# Hugging Face Spaces configuration
tempfile.tempdir = "/tmp"
# ==================================================================
# Core Functions (Updated with working gender voices)
# ==================================================================
def text_to_speech(slide_texts, lang='en', gender='female', transition_delay=0):
"""Convert text to speech with verified gender selection"""
audio_clips = []
durations = []
# Verified voice configuration matrix
tld_map = {
'female': {
'en': 'us', # American English
'es': 'es', # European Spanish
'fr': 'fr', # French (France)
'de': 'de', # German (Germany)
'ja': 'jp' # Japanese
},
'male': {
'en': 'co.uk', # British English
'es': 'com.mx', # Mexican Spanish
'fr': 'ca', # Canadian French
'de': 'at', # Austrian German
'ja': 'jp' # Japanese (fallback)
}
}
for i, text in enumerate(slide_texts):
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as fp:
try:
# Get valid TLD for selected gender and language
tld = tld_map[gender].get(lang, tld_map['female'][lang])
tts = gTTS(
text=text,
lang=lang,
tld=tld,
slow=False
)
tts.save(fp.name)
clip = AudioSegment.from_mp3(fp.name)
# Add transition delay as silence
silence = AudioSegment.silent(duration=transition_delay*1000)
clip_with_delay = clip + silence
audio_clips.append(clip_with_delay)
durations.append(len(clip_with_delay))
finally:
os.unlink(fp.name)
combined_audio = sum(audio_clips)
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as fp:
combined_audio.export(fp.name, format="mp3")
return durations, fp.name
def add_background_music(voice_path, music_path, volume_reduction=25):
"""Mix voice-over with background music"""
voice = AudioSegment.from_mp3(voice_path)
if music_path:
try:
music = AudioSegment.from_file(music_path)
music = music[:len(voice)].fade_out(2000)
music = music - volume_reduction
final_audio = voice.overlay(music)
except CouldntDecodeError:
raise ValueError("Invalid music file format")
else:
final_audio = voice
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as fp:
final_audio.export(fp.name, format="mp3")
return len(final_audio) / 1000, fp.name
def create_video(img_paths, durations, audio_path):
"""Generate video synchronized with audio"""
clips = []
for img_path, duration in zip(img_paths, durations):
clip = ImageClip(img_path).set_duration(duration / 1000)
clips.append(clip)
video = concatenate_videoclips(clips, method="compose")
video = video.set_audio(AudioFileClip(audio_path))
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as fp:
video.write_videofile(fp.name, fps=24, threads=4)
return fp.name
# ==================================================================
# Streamlit UI (Updated with language support warnings)
# ==================================================================
st.title("PNG Slides to Video Maker 🖼️➡️🎥")
st.markdown("Upload PNG slides, add scripts, and generate a video!")
# Language support warning
st.warning("""
**Voice Gender Support:**
✓ English (Male: British, Female: American)
✓ Spanish (Male: Mexican, Female: European)
✓ French (Male: Canadian, Female: France)
✓ German (Male: Austrian, Female: German)
✗ Japanese/Others: Female only
""")
# Main file uploader
uploaded_images = st.file_uploader(
"Step 1: Upload PNG Slides",
type=["png"],
accept_multiple_files=True,
key="main_uploader"
)
if not uploaded_images:
st.info("ℹ️ Please upload PNG slides to begin")
st.stop()
# Slide ordering
st.subheader("Step 2: Arrange Slide Order")
filenames = [img.name for img in uploaded_images]
st.session_state.slide_order = st.multiselect(
"Drag to reorder slides:",
filenames,
default=filenames,
key="sort_slides"
)
uploaded_images = [img for name in st.session_state.slide_order
for img in uploaded_images if img.name == name]
# Video settings
st.subheader("Step 3: Video Settings")
col1, col2 = st.columns(2)
with col1:
transition_delay = st.slider(
"Transition Delay (seconds)",
min_value=0,
max_value=5,
value=2,
help="Silence between slides after voice finishes"
)
with col2:
gender = st.selectbox(
"Voice Gender",
options=['female', 'male'],
help="Gender selection for supported languages"
)
# Language selector with full names
lang = st.selectbox(
"Voice Language",
options=[
('English', 'en'),
('Spanish', 'es'),
('French', 'fr'),
('German', 'de'),
('Japanese', 'ja'),
('Chinese', 'zh-CN'),
('Hindi', 'hi')
],
format_func=lambda x: x[0],
index=0
)[1] # Get language code
# Script input
st.subheader("Step 4: Add Scripts")
slide_texts = []
with st.expander(f"Scripts for {len(uploaded_images)} Slides", expanded=True):
for i, img in enumerate(uploaded_images):
text = st.text_area(
f"Slide {i+1} Text",
key=f"slide_{i}",
placeholder="Enter text for this slide...",
height=100
)
slide_texts.append(text.strip())
# Music settings
st.subheader("Step 5: Background Music (Optional)")
uploaded_music = st.file_uploader(
"Upload MP3 file",
type=["mp3"],
key="music_uploader"
)
music_volume = st.slider(
"Music Volume Reduction (dB)",
0, 30, 25,
help="Higher values make background music quieter"
) if uploaded_music else 0
# Generate button
st.subheader("Step 6: Generate Video")
if st.button("🚀 Generate Video", use_container_width=True, type="primary"):
# Validation
if len(slide_texts) != len(uploaded_images):
st.error("Number of scripts doesn't match number of slides!")
st.stop()
if any(not text for text in slide_texts):
st.error("All slides must have non-empty text!")
st.stop()
with st.spinner("Creating your video... This may take a minute ⏳"):
try:
# 1. Save images to temp files
img_paths = []
for img in uploaded_images:
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
f.write(img.getbuffer())
img_paths.append(f.name)
# 2. Generate voiceover with delays
durations, voice_path = text_to_speech(
slide_texts,
lang,
gender,
transition_delay
)
# 3. Process background music
music_path = None
if uploaded_music:
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
f.write(uploaded_music.getbuffer())
music_path = f.name
audio_duration, final_audio_path = add_background_music(
voice_path,
music_path,
music_volume
)
# 4. Create video
video_path = create_video(img_paths, durations, final_audio_path)
# 5. Display result
st.success("✅ Video Ready! Play it below")
st.video(video_path)
# 6. Cleanup
cleanup_files = img_paths + [voice_path, final_audio_path]
if music_path:
cleanup_files.append(music_path)
cleanup_files.append(video_path)
for f in cleanup_files:
if os.path.exists(f):
os.unlink(f)
except ValueError as e:
st.error(f"Audio Error: {str(e)}")
except Exception as e:
st.error(f"Processing Error: {str(e)}") |