TextToSpeech

Sleeping

App Files Files Community

hivecorp commited on Oct 20, 2024

Commit

513a56f

verified ·

1 Parent(s): 0692426

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -80

app.py CHANGED Viewed

@@ -1,92 +1,55 @@
 import gradio as gr
-import edge_tts
-import asyncio
-import tempfile
-import os
-from moviepy.editor import concatenate_videoclips, AudioFileClip, ImageClip, VideoFileClip
-# Get all available voices
-async def get_voices():
-    voices = await edge_tts.list_voices()
-    return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
-# Text-to-speech function
-async def text_to_speech(text, voice, rate, pitch):
-    if not text.strip():
-        return None, gr.Warning("Please enter the text to convert.")
-    if not voice:
-        return None, gr.Warning("Please select a voice.")
-    voice_short_name = voice.split(" - ")[0]
-    rate_str = f"{rate:+d}%"
-    pitch_str = f"{pitch:+d}Hz"
-    communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
-        tmp_path = tmp_file.name
-        await communicate.save(tmp_path)
-    return tmp_path, None
-# Text-to-video function
 def text_to_video(text, voice, rate, pitch, bg_media, video_width, video_height):
-    # Generate audio from text
-    audio, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
-    if warning:
-        return None, warning
-    audio_clip = AudioFileClip(audio)
-    # Check if bg_media is None
-    if bg_media is None:
-        return None, gr.Warning("Please upload a background image or video.")
-    # Create background video or image
     if bg_media.endswith('.mp4'):
-        bg_clip = VideoFileClip(bg_media).resize(newsize=(video_width, video_height)).set_duration(audio_clip.duration)
     else:
-        bg_clip = ImageClip(bg_media).set_duration(audio_clip.duration).resize(newsize=(video_width, video_height))
-    # Set audio for the background
     final_video = bg_clip.set_audio(audio_clip)
-    final_video_path = os.path.join(tempfile.gettempdir(), "output_video.mp4")
-    final_video.write_videofile(final_video_path, fps=24, codec="libx264")
-    return final_video_path, None
-# Gradio interface function
-def tts_interface(text, voice, rate, pitch, bg_media, video_width, video_height):
-    video, warning = text_to_video(text, voice, rate, pitch, bg_media, video_width, video_height)
-    return None, video, warning
-# Create Gradio app
-async def create_demo():
-    voices = await get_voices()
-    demo = gr.Interface(
-        fn=tts_interface,
-        inputs=[
-            gr.Textbox(label="Input Text", lines=5),
-            gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
-            gr.Slider(minimum=-50, maximum=50, value=0, label="Rate Adjustment (%)", step=1),
-            gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1),
-            gr.File(label="Upload Background Image or Video", type="filepath"),
-            gr.Slider(minimum=640, maximum=1920, value=1080, label="Video Width", step=10),
-            gr.Slider(minimum=480, maximum=1080, value=720, label="Video Height", step=10),
-        ],
-        outputs=[
-            gr.Audio(label="Generated Audio", type="filepath"),
-            gr.Video(label="Generated Video"),
-            gr.Markdown(label="Warning", visible=False)
-        ],
-        title="Edge TTS Text to Speech and Video",
-        description="Convert text to speech and video using Microsoft Edge TTS. Upload an image or video for the background.",
-        analytics_enabled=False,
-        allow_flagging=False,
-    )
-    return demo
-# Run the application
-if __name__ == "__main__":
-    demo = asyncio.run(create_demo())
-    demo.launch(share=True)  # Set share=True for public link

 import gradio as gr
+from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
+# Function to generate audio from text (placeholder)
+def generate_audio(text, voice, rate, pitch):
+    # This should generate the audio and return an AudioFileClip
+    # Implement your audio generation logic here
+    pass
+# Function to create video from text and background media
 def text_to_video(text, voice, rate, pitch, bg_media, video_width, video_height):
+    # Generate the audio clip
+    audio_clip = generate_audio(text, voice, rate, pitch)  # Ensure this function is defined to generate audio
+    # Determine the type of background media and create the appropriate clip
     if bg_media.endswith('.mp4'):
+        bg_clip = VideoFileClip(bg_media).set_duration(audio_clip.duration)
+    elif bg_media.endswith(('.jpg', '.png', '.jpeg')):
+        bg_clip = ImageClip(bg_media).set_duration(audio_clip.duration)
     else:
+        return None, "Unsupported media type."
+    # Create a final video with audio
     final_video = bg_clip.set_audio(audio_clip)
+    # Set the final output video file name
+    output_file = "output_video.mp4"
+    # Write the final video to a file
+    final_video.write_videofile(output_file, codec='libx264')
+    return output_file, None
+# Gradio interface
+def tts_interface(text, voice, rate, pitch, bg_media):
+    video, warning = text_to_video(text, voice, rate, pitch, bg_media, None, None)
+    if warning:
+        return warning
+    return video
+iface = gr.Interface(
+    fn=tts_interface,
+    inputs=[
+        gr.Textbox(label="Text"),
+        gr.Dropdown(label="Voice", choices=["Voice 1", "Voice 2"]),  # Update with actual voices
+        gr.Slider(label="Rate", minimum=0.5, maximum=2.0, step=0.1, value=1.0),
+        gr.Slider(label="Pitch", minimum=0, maximum=100, step=1, value=50),
+        gr.File(label="Background Media (Image/Video)")
+    ],
+    outputs="file",
+    title="Text to Video with Audio",
+    description="Upload an image or video and generate a video with audio from text."
+)
+iface.launch(share=True)