explorer7 commited on
Commit
b240c75
·
verified ·
1 Parent(s): d22c73a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -25
app.py CHANGED
@@ -1,43 +1,48 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
  from gtts import gTTS
4
  from pydub import AudioSegment
5
  import numpy as np
6
- import io
 
7
 
8
- # Load Whisper TINY (multilingual) – CPU safe
9
- asr = pipeline(
10
- "automatic-speech-recognition",
11
- model="openai/whisper-tiny",
12
- device="cpu"
13
- )
14
 
15
  def speech_to_speech_translation(audio_path):
16
  if audio_path is None:
17
  return None
18
 
19
- # 1. Speech file → French text
20
- result = asr(
21
- audio_path,
22
- generate_kwargs={"task": "translate", "language": "french"}
23
- )
24
- french_text = result["text"]
25
-
26
- # 2. French text → French speech (free, lightweight)
27
- tts = gTTS(text=french_text, lang="fr")
28
- mp3_fp = io.BytesIO()
29
- tts.write_to_fp(mp3_fp)
30
- mp3_fp.seek(0)
31
-
32
- audio_seg = AudioSegment.from_file(mp3_fp, format="mp3")
33
- samples = np.array(audio_seg.get_array_of_samples()).astype(np.float32)
 
 
 
 
 
 
 
34
  samples /= np.max(np.abs(samples))
35
 
36
- return (audio_seg.frame_rate, samples)
 
 
37
 
38
  demo = gr.Interface(
39
  fn=speech_to_speech_translation,
40
- inputs=gr.Audio(type="filepath", label="Upload speech"),
41
  outputs=gr.Audio(type="numpy", label="French speech output"),
42
  title="Speech-to-Speech Translation (French)",
43
  allow_flagging="never"
 
1
  import gradio as gr
2
+ import speech_recognition as sr
3
  from gtts import gTTS
4
  from pydub import AudioSegment
5
  import numpy as np
6
+ import tempfile
7
+ import os
8
 
9
+ recognizer = sr.Recognizer()
 
 
 
 
 
10
 
11
  def speech_to_speech_translation(audio_path):
12
  if audio_path is None:
13
  return None
14
 
15
+ # Convert input to wav
16
+ sound = AudioSegment.from_file(audio_path)
17
+ sound = sound.set_channels(1).set_frame_rate(16000)
18
+
19
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
20
+ sound.export(f.name, format="wav")
21
+ wav_path = f.name
22
+
23
+ # Speech → English text (free Google STT)
24
+ with sr.AudioFile(wav_path) as source:
25
+ audio_data = recognizer.record(source)
26
+ text = recognizer.recognize_google(audio_data)
27
+
28
+ os.remove(wav_path)
29
+
30
+ # English → French speech
31
+ tts = gTTS(text=text, lang="fr")
32
+ mp3_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
33
+ tts.save(mp3_path)
34
+
35
+ audio_out = AudioSegment.from_file(mp3_path, format="mp3")
36
+ samples = np.array(audio_out.get_array_of_samples()).astype(np.float32)
37
  samples /= np.max(np.abs(samples))
38
 
39
+ os.remove(mp3_path)
40
+
41
+ return (audio_out.frame_rate, samples)
42
 
43
  demo = gr.Interface(
44
  fn=speech_to_speech_translation,
45
+ inputs=gr.Audio(type="filepath", label="Input speech"),
46
  outputs=gr.Audio(type="numpy", label="French speech output"),
47
  title="Speech-to-Speech Translation (French)",
48
  allow_flagging="never"