Spaces:
Running
Running
File size: 5,516 Bytes
8fab2a6 0e9f5f3 2fb073b e91f600 0e9f5f3 2fb073b 0e9f5f3 8fab2a6 e91f600 2fb073b e91f600 2fb073b e91f600 2fb073b 625146d e91f600 2fb073b 8fab2a6 2fb073b 8fab2a6 2fb073b 23b1388 2fb073b e91f600 2fb073b 23b1388 e91f600 2fb073b e91f600 8fab2a6 e91f600 625146d e91f600 2fb073b e91f600 2fb073b e91f600 2fb073b e91f600 2fb073b e91f600 2fb073b e91f600 2fb073b e91f600 2fb073b e91f600 2fb073b e91f600 2fb073b e91f600 625146d e91f600 23b1388 e91f600 8fab2a6 e91f600 8fab2a6 e91f600 2fb073b e91f600 625146d e91f600 8fab2a6 625146d 2fb073b 8fab2a6 e91f600 625146d 2fb073b 625146d 2fb073b e91f600 625146d 0e9f5f3 8fab2a6 e91f600 625146d 8fab2a6 e91f600 8fab2a6 e91f600 8fab2a6 e91f600 0e9f5f3 e91f600 8fab2a6 2fb073b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 | import gradio as gr
import time
import os
import uuid
from datetime import datetime
import torch
from transformers import pipeline
import ffmpeg
# -----------------------------
# Models
# -----------------------------
W2V2_MODEL = "carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h"
WHISPER_MODEL = "davidilag/whisper-large-no-is-fo-100h-30k-steps"
MODEL_LABELS = {
"Carlos (wav2vec2 - FO)": W2V2_MODEL,
"Dávid (Whisper - NO/IS/FO)": WHISPER_MODEL,
}
PIPELINES = {} # cache: label -> pipeline
# -----------------------------
# Helpers
# -----------------------------
def get_asr_pipeline(model_label: str):
if model_label in PIPELINES:
return PIPELINES[model_label]
device = 0 if torch.cuda.is_available() else -1
p = pipeline(
"automatic-speech-recognition",
model=MODEL_LABELS[model_label],
device=device,
)
PIPELINES[model_label] = p
return p
def to_16k_wav(input_path: str) -> str:
if not input_path or not os.path.exists(input_path):
return ""
out_path = f"/tmp/{uuid.uuid4().hex}_16k.wav"
(
ffmpeg.input(input_path)
.output(out_path, ac=1, ar=16000, format="wav")
.overwrite_output()
.run(quiet=True)
)
return out_path
def extract_audio_from_m3u8(url: str) -> str:
out_path = f"/tmp/{uuid.uuid4().hex}_m3u8.aac"
(
ffmpeg.input(url)
.output(out_path, acodec="copy")
.overwrite_output()
.run(quiet=True)
)
return out_path
def write_history_file(text: str) -> str:
path = "/tmp/talukennari_history.txt"
with open(path, "w", encoding="utf-8") as f:
f.write(text or "")
return path
# -----------------------------
# Core Transcription
# -----------------------------
def transcribe(audio_path, state, m3u8_url, model_choices):
try:
state = state or ""
if not model_choices:
file_path = write_history_file(state)
return state, "Vel minst ein myndil.", state, file_path
if m3u8_url and str(m3u8_url).strip():
audio_path = extract_audio_from_m3u8(str(m3u8_url).strip())
if not audio_path:
file_path = write_history_file(state)
return state, "Einki ljóð er til talukenning.", state, file_path
wav_path = to_16k_wav(audio_path)
if not wav_path:
file_path = write_history_file(state)
return state, "Einki ljóð er til talukenning.", state, file_path
stamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
latest_blocks = []
history_blocks = []
for model_label in model_choices:
p = get_asr_pipeline(model_label)
time.sleep(0.05)
result = p(wav_path, chunk_length_s=30)
text = (result.get("text") or "").strip() or "(Eingin tekstur kom aftur.)"
latest_blocks.append(f"### {model_label}\n{text}")
history_blocks.append(f"[{stamp}] {model_label}\n{text}\n")
latest_text = "\n\n".join(latest_blocks).strip()
history_entry = "\n".join(history_blocks).strip() + "\n\n"
state = state + history_entry
file_path = write_history_file(state)
return state, latest_text, state, file_path
except Exception as e:
state = state or ""
err = f"Okkurt riggaði ikki í talukenningini: {type(e).__name__}: {e}"
file_path = write_history_file(state)
return state, err, state, file_path
def reset_all():
return "", "### Úrslit (samanbering)\n—", "", None
# -----------------------------
# UI
# -----------------------------
with gr.Blocks() as demo:
state_var = gr.State("")
gr.Markdown(
"## Talukennari\n"
"Vel ein ella fleiri myndlar og samanber úrslitini. "
"Teksturin verður goymdur undir **Tekstur** og kann takast niður sum .txt."
)
with gr.Row():
with gr.Column():
model_choices = gr.CheckboxGroup(
choices=list(MODEL_LABELS.keys()),
value=["Carlos (wav2vec2 - FO)"],
label="Vel ein ella fleiri myndlar",
)
audio_in = gr.Audio(type="filepath", label="Mikrofon ella ljóðfíla")
m3u8_url = gr.Textbox(
label="m3u8-leinki (t.d. frá kvf.fo ella logting.fo)",
placeholder="Lím m3u8 leinki her (valfrítt)",
)
with gr.Column():
latest_box = gr.Markdown("### Úrslit (samanbering)\n—")
# Collapsible history + download
with gr.Accordion("Tekstur", open=False):
history_box = gr.Textbox(
label="",
lines=14,
interactive=False,
placeholder="Her kemur allur teksturin (søgan) at liggja…",
)
download_file = gr.File(
label="Tak niður tekst (.txt)",
interactive=False
)
with gr.Row():
transcribe_button = gr.Button("Byrja talukenning")
reset_button = gr.Button("Strika alt")
transcribe_button.click(
transcribe,
inputs=[audio_in, state_var, m3u8_url, model_choices],
outputs=[state_var, latest_box, history_box, download_file],
)
reset_button.click(
reset_all,
inputs=[],
outputs=[state_var, latest_box, history_box, download_file],
)
demo.queue()
demo.launch()
|