File size: 3,465 Bytes
6316a64
6b9b7f5
6316a64
 
 
 
6b9b7f5
cf51174
6316a64
6b9b7f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02cc100
6316a64
cf51174
 
02cc100
6b9b7f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import streamlit as st
from streamlit_audiorec import st_audiorec
from transformers import pipeline
from gtts import gTTS
import tempfile
import os
import time
import audio_recorder_streamlit as audio_recorder  # Alternative package

# ------------------------------
# Setup models with progress indicators
# ------------------------------
@st.cache_resource
def load_models():
    with st.spinner("Loading speech-to-text model..."):
        stt_model = pipeline("automatic-speech-recognition", model="openai/whisper-small")
    
    with st.spinner("Loading translation models..."):
        translator_en = pipeline("translation", model="Helsinki-NLP/opus-mt-ps-en")
        translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-ps-ur")
    
    return stt_model, translator_en, translator_ur

try:
    stt_model, translator_en, translator_ur = load_models()
except Exception as e:
    st.error(f"Error loading models: {str(e)}")
    st.stop()

# ------------------------------
# Streamlit UI
# ------------------------------
st.set_page_config(page_title="Pashto Voice Translator", page_icon="🎙️")

st.title("🎙️ Pashto Voice Translator")
st.write("Speak in **Pashto** and get translation in **English** or **Urdu** with voice playback.")

# Language preference
target_lang = st.radio("Select output language:", ["English", "Urdu"])

st.write("Record your voice below:")
# Using audio_recorder_streamlit instead
wav_audio_data = audio_recorder.audio_recorder(text="Click to record", icon_size="2x")

if wav_audio_data is not None:
    st.audio(wav_audio_data, format="audio/wav")
    
    # Save audio to a temporary file with proper handling
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav:
        temp_wav.write(wav_audio_data)
        temp_filename = temp_wav.name
    
    try:
        # ------------------------------
        # Step 1: Speech to Text (Pashto)
        # ------------------------------
        with st.spinner("Transcribing speech..."):
            pashto_text = stt_model(temp_filename)["text"]
        
        # Clean up temporary file
        os.unlink(temp_filename)
        
        st.subheader("Pashto transcription:")
        st.info(pashto_text)

        # ------------------------------
        # Step 2: Translation
        # ------------------------------
        with st.spinner("Translating..."):
            if target_lang == "English":
                translated = translator_en(pashto_text)[0]["translation_text"]
            else:
                translated = translator_ur(pashto_text)[0]["translation_text"]
        
        st.subheader(f"Translated ({target_lang}):")
        st.success(translated)

        # ------------------------------
        # Step 3: Text-to-Speech
        # ------------------------------
        with st.spinner("Generating audio..."):
            tts = gTTS(text=translated, lang="en" if target_lang == "English" else "ur")
            
            # Use BytesIO to avoid file handling issues
            from io import BytesIO
            audio_bytes = BytesIO()
            tts.write_to_fp(audio_bytes)
            audio_bytes.seek(0)
            
            st.audio(audio_bytes, format="audio/mp3")
            
    except Exception as e:
        st.error(f"An error occurred: {str(e)}")
        # Ensure temporary file is cleaned up even if error occurs
        if os.path.exists(temp_filename):
            os.unlink(temp_filename)