| import gradio as gr |
| import torch |
| import os |
| import tempfile |
| import subprocess |
| from transformers import pipeline |
|
|
| |
| pipe = pipeline("automatic-speech-recognition", model="openai/whisper-medium") |
|
|
| def transcribe(audio_file): |
| |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: |
| tmp_path = tmp.name |
| subprocess.run(["ffmpeg", "-i", audio_file, "-ar", "16000", "-ac", "1", tmp_path, "-y"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
| |
| |
| texts = [] |
| for i in range(2): |
| result = pipe(tmp_path) |
| texts.append(result["text"].strip()) |
| |
| |
| best_text = max(texts, key=len) |
| |
| os.remove(tmp_path) |
| return best_text |
|
|
| |
| with gr.Blocks() as demo: |
| gr.Markdown("# 🎙️ تبدیل صوت به متن با دقت بالا") |
| gr.Markdown("هر فایل صوتی رو آپلود کن، متن دقیق استخراج میشه 👇") |
| |
| with gr.Row(): |
| audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="فایل صوتی") |
| |
| output_text = gr.Textbox(label="متن استخراجشده", lines=8) |
| btn_copy = gr.Button("📋 کپی متن") |
| |
| audio_input.change(fn=transcribe, inputs=audio_input, outputs=output_text) |
| btn_copy.click(lambda x: x, inputs=output_text, outputs=output_text) |
|
|
| demo.launch() |
|
|