Spaces:

explorer7
/

lini

Runtime error

explorer7 commited on 25 days ago

Commit

b240c75

verified ·

1 Parent(s): d22c73a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,43 +1,48 @@
 import gradio as gr
-from transformers import pipeline
 from gtts import gTTS
 from pydub import AudioSegment
 import numpy as np
-import io
-# Load Whisper TINY (multilingual) – CPU safe
-asr = pipeline(
-    "automatic-speech-recognition",
-    model="openai/whisper-tiny",
-    device="cpu"
-)
 def speech_to_speech_translation(audio_path):
     if audio_path is None:
         return None
-    # 1. Speech file → French text
-    result = asr(
-        audio_path,
-        generate_kwargs={"task": "translate", "language": "french"}
-    )
-    french_text = result["text"]
-    # 2. French text → French speech (free, lightweight)
-    tts = gTTS(text=french_text, lang="fr")
-    mp3_fp = io.BytesIO()
-    tts.write_to_fp(mp3_fp)
-    mp3_fp.seek(0)
-    audio_seg = AudioSegment.from_file(mp3_fp, format="mp3")
-    samples = np.array(audio_seg.get_array_of_samples()).astype(np.float32)
     samples /= np.max(np.abs(samples))
-    return (audio_seg.frame_rate, samples)
 demo = gr.Interface(
     fn=speech_to_speech_translation,
-    inputs=gr.Audio(type="filepath", label="Upload speech"),
     outputs=gr.Audio(type="numpy", label="French speech output"),
     title="Speech-to-Speech Translation (French)",
     allow_flagging="never"

 import gradio as gr
+import speech_recognition as sr
 from gtts import gTTS
 from pydub import AudioSegment
 import numpy as np
+import tempfile
+import os
+recognizer = sr.Recognizer()
 def speech_to_speech_translation(audio_path):
     if audio_path is None:
         return None
+    # Convert input to wav
+    sound = AudioSegment.from_file(audio_path)
+    sound = sound.set_channels(1).set_frame_rate(16000)
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+        sound.export(f.name, format="wav")
+        wav_path = f.name
+    # Speech → English text (free Google STT)
+    with sr.AudioFile(wav_path) as source:
+        audio_data = recognizer.record(source)
+        text = recognizer.recognize_google(audio_data)
+    os.remove(wav_path)
+    # English → French speech
+    tts = gTTS(text=text, lang="fr")
+    mp3_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
+    tts.save(mp3_path)
+    audio_out = AudioSegment.from_file(mp3_path, format="mp3")
+    samples = np.array(audio_out.get_array_of_samples()).astype(np.float32)
     samples /= np.max(np.abs(samples))
+    os.remove(mp3_path)
+    return (audio_out.frame_rate, samples)
 demo = gr.Interface(
     fn=speech_to_speech_translation,
+    inputs=gr.Audio(type="filepath", label="Input speech"),
     outputs=gr.Audio(type="numpy", label="French speech output"),
     title="Speech-to-Speech Translation (French)",
     allow_flagging="never"