File size: 1,495 Bytes
85738d7 b240c75 85738d7 b240c75 85738d7 b240c75 85738d7 22fbec3 85738d7 b240c75 85738d7 b240c75 85738d7 b240c75 22fbec3 85738d7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import gradio as gr
import speech_recognition as sr
from gtts import gTTS
from pydub import AudioSegment
import numpy as np
import tempfile
import os
recognizer = sr.Recognizer()
def speech_to_speech_translation(audio_path):
if audio_path is None:
return None
# Convert input to wav
sound = AudioSegment.from_file(audio_path)
sound = sound.set_channels(1).set_frame_rate(16000)
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
sound.export(f.name, format="wav")
wav_path = f.name
# Speech → English text (free Google STT)
with sr.AudioFile(wav_path) as source:
audio_data = recognizer.record(source)
text = recognizer.recognize_google(audio_data)
os.remove(wav_path)
# English → French speech
tts = gTTS(text=text, lang="fr")
mp3_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
tts.save(mp3_path)
audio_out = AudioSegment.from_file(mp3_path, format="mp3")
samples = np.array(audio_out.get_array_of_samples()).astype(np.float32)
samples /= np.max(np.abs(samples))
os.remove(mp3_path)
return (audio_out.frame_rate, samples)
demo = gr.Interface(
fn=speech_to_speech_translation,
inputs=gr.Audio(type="filepath", label="Input speech"),
outputs=gr.Audio(type="numpy", label="French speech output"),
title="Speech-to-Speech Translation (French)",
allow_flagging="never"
)
if __name__ == "__main__":
demo.launch()
|