| | |
| |
|
| | import os |
| | os.environ["OMP_NUM_THREADS"] = "1" |
| | os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128" |
| |
|
| | import gradio as gr |
| | import spaces |
| | from transformers import pipeline |
| | import torch |
| | import gc |
| |
|
| | |
| | |
| | |
| | @spaces.GPU(duration=180) |
| | def transcribe_3min(audio_path): |
| | if not audio_path: |
| | return "Hlaðið upp hljóðskrá" |
| |
|
| | pipe = pipeline( |
| | "automatic-speech-recognition", |
| | |
| | |
| | |
| | |
| | |
| | model="palli23/whisper-small-sam_spjall", |
| | torch_dtype=torch.float16, |
| | device=0, |
| | ) |
| |
|
| | result = pipe( |
| | audio_path, |
| | chunk_length_s=30, |
| | batch_size=8, |
| | return_timestamps=False, |
| | generate_kwargs={ |
| | "num_beams": 5, |
| | "repetition_penalty": 1.2, |
| | "no_repeat_ngram_size": 3, |
| | "temperature": 1.8, |
| | } |
| | ) |
| |
|
| | |
| | del pipe |
| | gc.collect() |
| | torch.cuda.empty_cache() |
| |
|
| | return result["text"] |
| |
|
| | |
| | |
| | |
| | with gr.Blocks() as demo: |
| | gr.Markdown("# Íslenskt ASR – 3 mínútur") |
| | gr.Markdown("**palli23/whisper-small-sam_spjall** · mjög lágur WER · allt að 5 mín hljóð") |
| | gr.Markdown("**Hafa samband:** pallinr1@protonmail.com") |
| |
|
| | audio_in = gr.Audio(type="filepath", label="Hlaðið upp .mp3 / .wav") |
| | btn = gr.Button("Transcribe", variant="primary", size="lg") |
| | output = gr.Textbox(lines=25, label="Útskrift") |
| |
|
| | btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output) |
| |
|
| | |
| | |
| | |
| | demo.launch( |
| | share=True, |
| | server_name="0.0.0.0", |
| | server_port=7860, |
| | auth=None |
| | ) |