Spaces:
Sleeping
Sleeping
File size: 3,465 Bytes
6316a64 6b9b7f5 6316a64 6b9b7f5 cf51174 6316a64 6b9b7f5 02cc100 6316a64 cf51174 02cc100 6b9b7f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import streamlit as st
from streamlit_audiorec import st_audiorec
from transformers import pipeline
from gtts import gTTS
import tempfile
import os
import time
import audio_recorder_streamlit as audio_recorder # Alternative package
# ------------------------------
# Setup models with progress indicators
# ------------------------------
@st.cache_resource
def load_models():
with st.spinner("Loading speech-to-text model..."):
stt_model = pipeline("automatic-speech-recognition", model="openai/whisper-small")
with st.spinner("Loading translation models..."):
translator_en = pipeline("translation", model="Helsinki-NLP/opus-mt-ps-en")
translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-ps-ur")
return stt_model, translator_en, translator_ur
try:
stt_model, translator_en, translator_ur = load_models()
except Exception as e:
st.error(f"Error loading models: {str(e)}")
st.stop()
# ------------------------------
# Streamlit UI
# ------------------------------
st.set_page_config(page_title="Pashto Voice Translator", page_icon="🎙️")
st.title("🎙️ Pashto Voice Translator")
st.write("Speak in **Pashto** and get translation in **English** or **Urdu** with voice playback.")
# Language preference
target_lang = st.radio("Select output language:", ["English", "Urdu"])
st.write("Record your voice below:")
# Using audio_recorder_streamlit instead
wav_audio_data = audio_recorder.audio_recorder(text="Click to record", icon_size="2x")
if wav_audio_data is not None:
st.audio(wav_audio_data, format="audio/wav")
# Save audio to a temporary file with proper handling
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav:
temp_wav.write(wav_audio_data)
temp_filename = temp_wav.name
try:
# ------------------------------
# Step 1: Speech to Text (Pashto)
# ------------------------------
with st.spinner("Transcribing speech..."):
pashto_text = stt_model(temp_filename)["text"]
# Clean up temporary file
os.unlink(temp_filename)
st.subheader("Pashto transcription:")
st.info(pashto_text)
# ------------------------------
# Step 2: Translation
# ------------------------------
with st.spinner("Translating..."):
if target_lang == "English":
translated = translator_en(pashto_text)[0]["translation_text"]
else:
translated = translator_ur(pashto_text)[0]["translation_text"]
st.subheader(f"Translated ({target_lang}):")
st.success(translated)
# ------------------------------
# Step 3: Text-to-Speech
# ------------------------------
with st.spinner("Generating audio..."):
tts = gTTS(text=translated, lang="en" if target_lang == "English" else "ur")
# Use BytesIO to avoid file handling issues
from io import BytesIO
audio_bytes = BytesIO()
tts.write_to_fp(audio_bytes)
audio_bytes.seek(0)
st.audio(audio_bytes, format="audio/mp3")
except Exception as e:
st.error(f"An error occurred: {str(e)}")
# Ensure temporary file is cleaned up even if error occurs
if os.path.exists(temp_filename):
os.unlink(temp_filename) |