| |
|
|
| import os |
| os.environ["OMP_NUM_THREADS"] = "1" |
| os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128" |
|
|
| import gradio as gr |
| import spaces |
| from transformers import pipeline |
| import torch |
| import gc |
|
|
| |
| |
| |
| @spaces.GPU(duration=180) |
| def transcribe_3min(audio_path): |
| if not audio_path: |
| return "Hlaðið upp hljóðskrá" |
|
|
| pipe = pipeline( |
| "automatic-speech-recognition", |
| |
| |
| |
| |
| |
| model="palli23/whisper-small-sam_spjall", |
| torch_dtype=torch.float16, |
| device=0, |
| ) |
|
|
| result = pipe( |
| audio_path, |
| chunk_length_s=30, |
| batch_size=8, |
| return_timestamps=False, |
| generate_kwargs={ |
| "num_beams": 5, |
| "repetition_penalty": 1.2, |
| "no_repeat_ngram_size": 3, |
| "temperature": 0.0, |
| } |
| ) |
|
|
| |
| del pipe |
| gc.collect() |
| torch.cuda.empty_cache() |
|
|
| return result["text"] |
|
|
| |
| |
| |
| with gr.Blocks() as demo: |
| gr.Markdown("# Íslenskt ASR – 3 mínútur") |
| gr.Markdown("**palli23/whisper-small-sam_spjall** · mjög lágur WER · allt að 5 mín hljóð") |
| gr.Markdown("**Hafa samband:** pallinr1@protonmail.com") |
|
|
| audio_in = gr.Audio(type="filepath", label="Hlaðið upp .mp3 / .wav") |
| btn = gr.Button("Transcribe", variant="primary", size="lg") |
| output = gr.Textbox(lines=25, label="Útskrift") |
|
|
| btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output) |
|
|
| |
| |
| |
| demo.launch( |
| share=True, |
| server_name="0.0.0.0", |
| server_port=7860, |
| auth=None |
| ) |