|
|
|
|
|
|
|
|
import os |
|
|
os.environ["OMP_NUM_THREADS"] = "1" |
|
|
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128" |
|
|
|
|
|
import gradio as gr |
|
|
import spaces |
|
|
from transformers import pipeline |
|
|
import torch |
|
|
import gc |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@spaces.GPU(duration=180) |
|
|
def transcribe_3min(audio_path): |
|
|
if not audio_path: |
|
|
return "Hlaðið upp hljóðskrá" |
|
|
|
|
|
pipe = pipeline( |
|
|
"automatic-speech-recognition", |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model="palli23/whisper-small-sam_spjall", |
|
|
torch_dtype=torch.float16, |
|
|
device=0, |
|
|
) |
|
|
|
|
|
result = pipe( |
|
|
audio_path, |
|
|
chunk_length_s=30, |
|
|
batch_size=8, |
|
|
return_timestamps=False, |
|
|
generate_kwargs={ |
|
|
"num_beams": 5, |
|
|
"repetition_penalty": 1.2, |
|
|
"no_repeat_ngram_size": 3, |
|
|
"temperature": 0.0, |
|
|
} |
|
|
) |
|
|
|
|
|
|
|
|
del pipe |
|
|
gc.collect() |
|
|
torch.cuda.empty_cache() |
|
|
|
|
|
return result["text"] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# Íslenskt ASR – 3 mínútur") |
|
|
gr.Markdown("**palli23/whisper-small-sam_spjall** · mjög lágur WER · allt að 5 mín hljóð") |
|
|
gr.Markdown("**Hafa samband:** pallinr1@protonmail.com") |
|
|
|
|
|
audio_in = gr.Audio(type="filepath", label="Hlaðið upp .mp3 / .wav") |
|
|
btn = gr.Button("Transcribe", variant="primary", size="lg") |
|
|
output = gr.Textbox(lines=25, label="Útskrift") |
|
|
|
|
|
btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
demo.launch( |
|
|
share=True, |
|
|
server_name="0.0.0.0", |
|
|
server_port=7860, |
|
|
auth=None |
|
|
) |