import gradio as gr import torch import jiwer import re from transformers import pipeline from deep_translator import GoogleTranslator model_id = "nrlt/whisper-small-mn-last2" print("Модел ачаалж байна...") pipe = pipeline( "automatic-speech-recognition", model=model_id, device="cpu", generate_kwargs={"language": "mn", "task": "transcribe"} ) translator = GoogleTranslator(source='mn', target='en') def clean_text(text): if not text: return "" text = text.lower() text = re.sub(r'[^\w\s]', '', text) return text.strip() def process_speech(audio_path, reference_text): if audio_path is None or audio_path == "": return " микрофоноор яриарай", "N/A", "N/A" try: result = pipe(audio_path) mn_text = result["text"] wer_str = "Жишиг текст оруулаагүй" if reference_text and reference_text.strip(): ref_clean = clean_text(reference_text) hyp_clean = clean_text(mn_text) if ref_clean: wer_score = jiwer.wer(ref_clean, hyp_clean) wer_str = f"{wer_score * 100:.1f}%" en_text = translator.translate(mn_text) return mn_text, wer_str, en_text except Exception as e: return f" Алдаа: {str(e)}", "N/A", "N/A" def reset(): return None, "", "", "", "" with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown(""" # Монгол хэлний ASR болон Орчуулга **Заавар:** Микрофоноор ярин зогсоосны дараа **"Таних"** товч дарна уу. """) with gr.Row(): with gr.Column(): gr.Markdown("### 1. Оролт") ref_input = gr.Textbox( label=" Жишиг текст (заавал биш)", placeholder="Өнөөдөр цаг агаар маш сайхан байна.", lines=2 ) audio_input = gr.Audio( sources=["microphone"], type="filepath", label="Яриад зогсооно уу", streaming=False ) with gr.Row(): submit_btn = gr.Button("Таних & Орчуулах", variant="primary") reset_btn = gr.Button("Дахин эхлэх", variant="secondary") with gr.Column(): gr.Markdown("### 2. Гаралт") mn_output = gr.Textbox(label="Таньсан текст", lines=2) wer_output = gr.Textbox(label="WER (Алдааны хувь)", lines=1) en_output = gr.Textbox(label="Англи орчуулга", lines=2) # Button click with proper audio handling submit_btn.click( fn=process_speech, inputs=[audio_input, ref_input], outputs=[mn_output, wer_output, en_output] ) # Reset button clears everything reset_btn.click( fn=reset, inputs=[], outputs=[audio_input, ref_input, mn_output, wer_output, en_output] ) if __name__ == "__main__": demo.launch()