import streamlit as st from streamlit_audiorec import st_audiorec from transformers import pipeline from gtts import gTTS import tempfile import os import time import audio_recorder_streamlit as audio_recorder # Alternative package # ------------------------------ # Setup models with progress indicators # ------------------------------ @st.cache_resource def load_models(): with st.spinner("Loading speech-to-text model..."): stt_model = pipeline("automatic-speech-recognition", model="openai/whisper-small") with st.spinner("Loading translation models..."): translator_en = pipeline("translation", model="Helsinki-NLP/opus-mt-ps-en") translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-ps-ur") return stt_model, translator_en, translator_ur try: stt_model, translator_en, translator_ur = load_models() except Exception as e: st.error(f"Error loading models: {str(e)}") st.stop() # ------------------------------ # Streamlit UI # ------------------------------ st.set_page_config(page_title="Pashto Voice Translator", page_icon="🎙️") st.title("🎙️ Pashto Voice Translator") st.write("Speak in **Pashto** and get translation in **English** or **Urdu** with voice playback.") # Language preference target_lang = st.radio("Select output language:", ["English", "Urdu"]) st.write("Record your voice below:") # Using audio_recorder_streamlit instead wav_audio_data = audio_recorder.audio_recorder(text="Click to record", icon_size="2x") if wav_audio_data is not None: st.audio(wav_audio_data, format="audio/wav") # Save audio to a temporary file with proper handling with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav: temp_wav.write(wav_audio_data) temp_filename = temp_wav.name try: # ------------------------------ # Step 1: Speech to Text (Pashto) # ------------------------------ with st.spinner("Transcribing speech..."): pashto_text = stt_model(temp_filename)["text"] # Clean up temporary file os.unlink(temp_filename) st.subheader("Pashto transcription:") st.info(pashto_text) # ------------------------------ # Step 2: Translation # ------------------------------ with st.spinner("Translating..."): if target_lang == "English": translated = translator_en(pashto_text)[0]["translation_text"] else: translated = translator_ur(pashto_text)[0]["translation_text"] st.subheader(f"Translated ({target_lang}):") st.success(translated) # ------------------------------ # Step 3: Text-to-Speech # ------------------------------ with st.spinner("Generating audio..."): tts = gTTS(text=translated, lang="en" if target_lang == "English" else "ur") # Use BytesIO to avoid file handling issues from io import BytesIO audio_bytes = BytesIO() tts.write_to_fp(audio_bytes) audio_bytes.seek(0) st.audio(audio_bytes, format="audio/mp3") except Exception as e: st.error(f"An error occurred: {str(e)}") # Ensure temporary file is cleaned up even if error occurs if os.path.exists(temp_filename): os.unlink(temp_filename)