Enerelt / app.py
nrlt's picture
Update app.py
e8fbca8 verified
import gradio as gr
import torch
import jiwer
import re
from transformers import pipeline
from deep_translator import GoogleTranslator
model_id = "nrlt/whisper-small-mn-last2"
print("Модел ачаалж байна...")
pipe = pipeline(
"automatic-speech-recognition",
model=model_id,
device="cpu",
generate_kwargs={"language": "mn", "task": "transcribe"}
)
translator = GoogleTranslator(source='mn', target='en')
def clean_text(text):
if not text:
return ""
text = text.lower()
text = re.sub(r'[^\w\s]', '', text)
return text.strip()
def process_speech(audio_path, reference_text):
if audio_path is None or audio_path == "":
return " микрофоноор яриарай", "N/A", "N/A"
try:
result = pipe(audio_path)
mn_text = result["text"]
wer_str = "Жишиг текст оруулаагүй"
if reference_text and reference_text.strip():
ref_clean = clean_text(reference_text)
hyp_clean = clean_text(mn_text)
if ref_clean:
wer_score = jiwer.wer(ref_clean, hyp_clean)
wer_str = f"{wer_score * 100:.1f}%"
en_text = translator.translate(mn_text)
return mn_text, wer_str, en_text
except Exception as e:
return f" Алдаа: {str(e)}", "N/A", "N/A"
def reset():
return None, "", "", "", ""
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# Монгол хэлний ASR болон Орчуулга
**Заавар:** Микрофоноор ярин зогсоосны дараа **"Таних"** товч дарна уу.
""")
with gr.Row():
with gr.Column():
gr.Markdown("### 1. Оролт")
ref_input = gr.Textbox(
label=" Жишиг текст (заавал биш)",
placeholder="Өнөөдөр цаг агаар маш сайхан байна.",
lines=2
)
audio_input = gr.Audio(
sources=["microphone"],
type="filepath",
label="Яриад зогсооно уу",
streaming=False
)
with gr.Row():
submit_btn = gr.Button("Таних & Орчуулах", variant="primary")
reset_btn = gr.Button("Дахин эхлэх", variant="secondary")
with gr.Column():
gr.Markdown("### 2. Гаралт")
mn_output = gr.Textbox(label="Таньсан текст", lines=2)
wer_output = gr.Textbox(label="WER (Алдааны хувь)", lines=1)
en_output = gr.Textbox(label="Англи орчуулга", lines=2)
# Button click with proper audio handling
submit_btn.click(
fn=process_speech,
inputs=[audio_input, ref_input],
outputs=[mn_output, wer_output, en_output]
)
# Reset button clears everything
reset_btn.click(
fn=reset,
inputs=[],
outputs=[audio_input, ref_input, mn_output, wer_output, en_output]
)
if __name__ == "__main__":
demo.launch()