| import gradio as gr |
| import torch |
| import jiwer |
| import re |
| from transformers import pipeline |
| from deep_translator import GoogleTranslator |
|
|
| model_id = "nrlt/whisper-small-mn-last2" |
| print("Модел ачаалж байна...") |
| pipe = pipeline( |
| "automatic-speech-recognition", |
| model=model_id, |
| device="cpu", |
| generate_kwargs={"language": "mn", "task": "transcribe"} |
| ) |
|
|
| translator = GoogleTranslator(source='mn', target='en') |
|
|
| def clean_text(text): |
| if not text: |
| return "" |
| text = text.lower() |
| text = re.sub(r'[^\w\s]', '', text) |
| return text.strip() |
|
|
| def process_speech(audio_path, reference_text): |
| if audio_path is None or audio_path == "": |
| return " микрофоноор яриарай", "N/A", "N/A" |
| |
| try: |
| result = pipe(audio_path) |
| mn_text = result["text"] |
| |
| wer_str = "Жишиг текст оруулаагүй" |
| if reference_text and reference_text.strip(): |
| ref_clean = clean_text(reference_text) |
| hyp_clean = clean_text(mn_text) |
| if ref_clean: |
| wer_score = jiwer.wer(ref_clean, hyp_clean) |
| wer_str = f"{wer_score * 100:.1f}%" |
| |
| en_text = translator.translate(mn_text) |
| return mn_text, wer_str, en_text |
| |
| except Exception as e: |
| return f" Алдаа: {str(e)}", "N/A", "N/A" |
|
|
| def reset(): |
| return None, "", "", "", "" |
|
|
| with gr.Blocks(theme=gr.themes.Soft()) as demo: |
| gr.Markdown(""" |
| # Монгол хэлний ASR болон Орчуулга |
| **Заавар:** Микрофоноор ярин зогсоосны дараа **"Таних"** товч дарна уу. |
| """) |
| |
| with gr.Row(): |
| with gr.Column(): |
| gr.Markdown("### 1. Оролт") |
| ref_input = gr.Textbox( |
| label=" Жишиг текст (заавал биш)", |
| placeholder="Өнөөдөр цаг агаар маш сайхан байна.", |
| lines=2 |
| ) |
| audio_input = gr.Audio( |
| sources=["microphone"], |
| type="filepath", |
| label="Яриад зогсооно уу", |
| streaming=False |
| ) |
| with gr.Row(): |
| submit_btn = gr.Button("Таних & Орчуулах", variant="primary") |
| reset_btn = gr.Button("Дахин эхлэх", variant="secondary") |
| |
| with gr.Column(): |
| gr.Markdown("### 2. Гаралт") |
| mn_output = gr.Textbox(label="Таньсан текст", lines=2) |
| wer_output = gr.Textbox(label="WER (Алдааны хувь)", lines=1) |
| en_output = gr.Textbox(label="Англи орчуулга", lines=2) |
| |
| |
| submit_btn.click( |
| fn=process_speech, |
| inputs=[audio_input, ref_input], |
| outputs=[mn_output, wer_output, en_output] |
| ) |
| |
| |
| reset_btn.click( |
| fn=reset, |
| inputs=[], |
| outputs=[audio_input, ref_input, mn_output, wer_output, en_output] |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |