Harishkhawaja's picture
Create app.py
e7ad77d verified
import streamlit as st
from transformers import MarianMTModel, MarianTokenizer
import whisper
import tempfile
import base64
import os
from audiorecorder import audiorecorder # pip install streamlit-audiorecorder
# Load models once
@st.cache_resource
def load_models():
whisper_model = whisper.load_model("base")
tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-ar-en")
translator = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-ar-en")
return whisper_model, tokenizer, translator
st.title("🎙️ Live Arabic Sermon Translator")
st.markdown("Click the mic, say something in Arabic, and wait a few seconds for translation.")
# Record audio
audio = audiorecorder("Start Recording", "Stop Recording")
if len(audio) > 0:
st.audio(audio.tobytes(), format="audio/wav")
# Save audio to temp file
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
f.write(audio.tobytes())
temp_wav_path = f.name
st.info("Transcribing Arabic...")
whisper_model, tokenizer, translator = load_models()
transcription = whisper_model.transcribe(temp_wav_path, language="ar")
arabic_text = transcription["text"]
st.markdown("### Arabic")
st.write(arabic_text)
st.info("Translating to English...")
tokens = tokenizer(arabic_text, return_tensors="pt", padding=True)
output = translator.generate(**tokens)
english_text = tokenizer.decode(output[0], skip_special_tokens=True)
st.markdown("### English")
st.success(english_text)
os.remove(temp_wav_path)