mishiawan commited on
Commit
4b0dd9e
·
verified ·
1 Parent(s): 3fe01d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -83
app.py CHANGED
@@ -1,98 +1,82 @@
1
  import streamlit as st
2
- from transformers import pipeline
3
- import speech_recognition as sr
 
4
  from gtts import gTTS
5
- import tempfile
6
  import os
7
- import base64
8
 
9
- # Initialize the translation pipeline
10
  @st.cache_resource
11
- def load_translation_pipeline(model_name):
12
- return pipeline("translation", model=model_name)
13
 
14
- # Initialize speech recognition
15
- recognizer = sr.Recognizer()
16
 
17
- def translate_text(input_text, model):
18
- return model(input_text)[0]["translation_text"]
19
-
20
- def speech_to_text(audio_file):
21
- with sr.AudioFile(audio_file) as source:
22
- audio_data = recognizer.record(source)
23
- return recognizer.recognize_google(audio_data)
24
-
25
- def text_to_speech(text, language):
26
- tts = gTTS(text=text, lang=language)
27
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
28
- tts.save(temp_file.name)
29
- return temp_file.name
30
-
31
- # Streamlit app
32
- st.title("Real-Time Language Translator")
33
- st.write("Translate voice and text between multiple languages in real-time!")
34
-
35
- # Language selection
36
- st.sidebar.header("Settings")
37
- input_lang = st.sidebar.selectbox("Select Input Language", ["English", "French", "Spanish", "German", "Hindi"])
38
- output_lang = st.sidebar.selectbox("Select Output Language", ["English", "French", "Spanish", "German", "Hindi"])
39
-
40
- # Language codes mapping
41
- lang_codes = {
42
  "English": "en",
43
  "French": "fr",
44
  "Spanish": "es",
45
  "German": "de",
46
- "Hindi": "hi"
 
 
 
 
47
  }
48
 
49
- input_code = lang_codes[input_lang]
50
- output_code = lang_codes[output_lang]
 
 
 
 
 
 
 
 
 
51
 
52
- # Model selection
53
- model_name = f"Helsinki-NLP/opus-mt-{input_code}-{output_code}"
54
- translation_pipeline = load_translation_pipeline(model_name)
55
-
56
- # Input options
57
- st.header("Input Options")
58
- input_mode = st.radio("Choose Input Mode:", ["Text", "Voice"])
59
-
60
- if input_mode == "Text":
61
- input_text = st.text_area(f"Enter text in {input_lang}:")
62
- if st.button("Translate"):
63
- if input_text.strip():
64
- translated_text = translate_text(input_text, translation_pipeline)
65
- st.success(f"Translated Text in {output_lang}: {translated_text}")
66
-
67
- # Option to download translation as audio
68
- if st.checkbox("Play Translated Audio"):
69
- audio_file = text_to_speech(translated_text, output_code)
70
- audio_bytes = open(audio_file, "rb").read()
71
- st.audio(audio_bytes, format="audio/mp3")
72
-
73
- # Provide download link
74
- b64 = base64.b64encode(audio_bytes).decode()
75
- href = f'<a href="data:audio/mp3;base64,{b64}" download="translation.mp3">Download Translated Audio</a>'
76
- st.markdown(href, unsafe_allow_html=True)
77
- else:
78
- audio_file = st.file_uploader("Upload an audio file (WAV format)", type=["wav"])
79
- if audio_file is not None:
80
- if st.button("Translate"):
81
- try:
82
- input_text = speech_to_text(audio_file)
83
- st.write(f"Recognized Text in {input_lang}: {input_text}")
84
- translated_text = translate_text(input_text, translation_pipeline)
85
- st.success(f"Translated Text in {output_lang}: {translated_text}")
86
-
87
- # Option to download translation as audio
88
- if st.checkbox("Play Translated Audio"):
89
- audio_file = text_to_speech(translated_text, output_code)
90
- audio_bytes = open(audio_file, "rb").read()
91
- st.audio(audio_bytes, format="audio/mp3")
 
 
92
 
93
- # Provide download link
94
- b64 = base64.b64encode(audio_bytes).decode()
95
- href = f'<a href="data:audio/mp3;base64,{b64}" download="translation.mp3">Download Translated Audio</a>'
96
- st.markdown(href, unsafe_allow_html=True)
97
- except Exception as e:
98
- st.error(f"Error: {e}")
 
1
  import streamlit as st
2
+ from pydub import AudioSegment
3
+ from pydub.playback import play
4
+ import whisper
5
  from gtts import gTTS
 
6
  import os
 
7
 
8
+ # Load Whisper model (open-source)
9
  @st.cache_resource
10
+ def load_model():
11
+ return whisper.load_model("base")
12
 
13
+ model = load_model()
 
14
 
15
+ # Supported language options
16
+ languages = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  "English": "en",
18
  "French": "fr",
19
  "Spanish": "es",
20
  "German": "de",
21
+ "Chinese": "zh",
22
+ "Japanese": "ja",
23
+ "Korean": "ko",
24
+ "Hindi": "hi",
25
+ "Urdu": "ur"
26
  }
27
 
28
+ # App UI
29
+ st.title("Real-Time Voice Translator 🌍🎤")
30
+ st.markdown(
31
+ """
32
+ This application allows you to translate spoken words between multiple languages in real-time.
33
+ **Steps**:
34
+ 1. Choose input and output languages.
35
+ 2. Upload your audio file.
36
+ 3. Get the translated output and synthesized speech.
37
+ """
38
+ )
39
 
40
+ # Language selection
41
+ input_language = st.selectbox("Select Input Language:", list(languages.keys()))
42
+ output_language = st.selectbox("Select Output Language:", list(languages.keys()))
43
+
44
+ # Audio file upload
45
+ audio_file = st.file_uploader("Upload an audio file (in .wav format):", type=["wav"])
46
+
47
+ if audio_file:
48
+ # Load audio file
49
+ with open("temp_audio.wav", "wb") as f:
50
+ f.write(audio_file.read())
51
+ st.audio("temp_audio.wav", format="audio/wav", start_time=0)
52
+
53
+ # Transcribe audio using Whisper
54
+ st.write("Transcribing audio...")
55
+ audio = whisper.load_audio("temp_audio.wav")
56
+ audio = whisper.pad_or_trim(audio)
57
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
58
+ options = whisper.DecodingOptions(language=languages[input_language], fp16=False)
59
+ transcription = whisper.decode(model, mel, options).text
60
+
61
+ st.write(f"Transcribed Text: **{transcription}**")
62
+
63
+ # Translate text
64
+ st.write("Translating text...")
65
+ translated_text = model.transcribe("temp_audio.wav", task="translate", language=languages[output_language])["text"]
66
+ st.write(f"Translated Text: **{translated_text}**")
67
+
68
+ # Convert translated text to speech
69
+ st.write("Generating synthesized speech...")
70
+ tts = gTTS(text=translated_text, lang=languages[output_language])
71
+ tts.save("output_audio.mp3")
72
+
73
+ # Play output audio
74
+ st.audio("output_audio.mp3", format="audio/mp3", start_time=0)
75
+
76
+ # Remove temporary files
77
+ os.remove("temp_audio.wav")
78
+ os.remove("output_audio.mp3")
79
+
80
+ st.markdown("---")
81
+ st.write("Developed using open-source models and tools. 🚀")
82