File size: 5,516 Bytes
8fab2a6
0e9f5f3
2fb073b
 
e91f600
 
0e9f5f3
2fb073b
0e9f5f3
8fab2a6
e91f600
 
 
2fb073b
 
 
e91f600
 
 
 
 
 
2fb073b
 
e91f600
 
 
 
 
 
2fb073b
625146d
e91f600
 
 
 
 
 
2fb073b
8fab2a6
 
2fb073b
 
 
8fab2a6
2fb073b
 
23b1388
2fb073b
 
 
 
 
 
 
 
e91f600
2fb073b
23b1388
e91f600
2fb073b
 
 
 
 
 
e91f600
 
 
 
 
 
 
 
 
 
 
8fab2a6
e91f600
 
 
 
625146d
e91f600
2fb073b
e91f600
2fb073b
e91f600
 
 
2fb073b
e91f600
2fb073b
e91f600
 
2fb073b
e91f600
2fb073b
e91f600
 
2fb073b
e91f600
 
 
2fb073b
e91f600
 
2fb073b
e91f600
625146d
e91f600
 
 
23b1388
e91f600
 
8fab2a6
e91f600
8fab2a6
e91f600
 
 
 
 
2fb073b
e91f600
 
625146d
e91f600
 
 
 
 
8fab2a6
 
 
625146d
 
 
 
 
2fb073b
8fab2a6
 
e91f600
 
 
625146d
2fb073b
 
625146d
2fb073b
 
e91f600
625146d
0e9f5f3
8fab2a6
 
e91f600
 
625146d
 
 
 
 
 
 
 
 
 
 
 
8fab2a6
 
 
e91f600
8fab2a6
 
e91f600
 
 
8fab2a6
 
 
e91f600
0e9f5f3
e91f600
8fab2a6
 
 
2fb073b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
import gradio as gr
import time
import os
import uuid
from datetime import datetime

import torch
from transformers import pipeline
import ffmpeg

# -----------------------------
# Models
# -----------------------------
W2V2_MODEL = "carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h"
WHISPER_MODEL = "davidilag/whisper-large-no-is-fo-100h-30k-steps"

MODEL_LABELS = {
    "Carlos (wav2vec2 - FO)": W2V2_MODEL,
    "Dávid (Whisper - NO/IS/FO)": WHISPER_MODEL,
}

PIPELINES = {}  # cache: label -> pipeline


# -----------------------------
# Helpers
# -----------------------------
def get_asr_pipeline(model_label: str):
    if model_label in PIPELINES:
        return PIPELINES[model_label]

    device = 0 if torch.cuda.is_available() else -1
    p = pipeline(
        "automatic-speech-recognition",
        model=MODEL_LABELS[model_label],
        device=device,
    )
    PIPELINES[model_label] = p
    return p


def to_16k_wav(input_path: str) -> str:
    if not input_path or not os.path.exists(input_path):
        return ""

    out_path = f"/tmp/{uuid.uuid4().hex}_16k.wav"
    (
        ffmpeg.input(input_path)
        .output(out_path, ac=1, ar=16000, format="wav")
        .overwrite_output()
        .run(quiet=True)
    )
    return out_path


def extract_audio_from_m3u8(url: str) -> str:
    out_path = f"/tmp/{uuid.uuid4().hex}_m3u8.aac"
    (
        ffmpeg.input(url)
        .output(out_path, acodec="copy")
        .overwrite_output()
        .run(quiet=True)
    )
    return out_path


def write_history_file(text: str) -> str:
    path = "/tmp/talukennari_history.txt"
    with open(path, "w", encoding="utf-8") as f:
        f.write(text or "")
    return path


# -----------------------------
# Core Transcription
# -----------------------------
def transcribe(audio_path, state, m3u8_url, model_choices):
    try:
        state = state or ""

        if not model_choices:
            file_path = write_history_file(state)
            return state, "Vel minst ein myndil.", state, file_path

        if m3u8_url and str(m3u8_url).strip():
            audio_path = extract_audio_from_m3u8(str(m3u8_url).strip())

        if not audio_path:
            file_path = write_history_file(state)
            return state, "Einki ljóð er til talukenning.", state, file_path

        wav_path = to_16k_wav(audio_path)
        if not wav_path:
            file_path = write_history_file(state)
            return state, "Einki ljóð er til talukenning.", state, file_path

        stamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

        latest_blocks = []
        history_blocks = []

        for model_label in model_choices:
            p = get_asr_pipeline(model_label)
            time.sleep(0.05)

            result = p(wav_path, chunk_length_s=30)
            text = (result.get("text") or "").strip() or "(Eingin tekstur kom aftur.)"

            latest_blocks.append(f"### {model_label}\n{text}")
            history_blocks.append(f"[{stamp}] {model_label}\n{text}\n")

        latest_text = "\n\n".join(latest_blocks).strip()
        history_entry = "\n".join(history_blocks).strip() + "\n\n"

        state = state + history_entry
        file_path = write_history_file(state)

        return state, latest_text, state, file_path

    except Exception as e:
        state = state or ""
        err = f"Okkurt riggaði ikki í talukenningini: {type(e).__name__}: {e}"
        file_path = write_history_file(state)
        return state, err, state, file_path


def reset_all():
    return "", "### Úrslit (samanbering)\n—", "", None


# -----------------------------
# UI
# -----------------------------
with gr.Blocks() as demo:
    state_var = gr.State("")

    gr.Markdown(
        "## Talukennari\n"
        "Vel ein ella fleiri myndlar og samanber úrslitini. "
        "Teksturin verður goymdur undir **Tekstur** og kann takast niður sum .txt."
    )

    with gr.Row():
        with gr.Column():
            model_choices = gr.CheckboxGroup(
                choices=list(MODEL_LABELS.keys()),
                value=["Carlos (wav2vec2 - FO)"],
                label="Vel ein ella fleiri myndlar",
            )

            audio_in = gr.Audio(type="filepath", label="Mikrofon ella ljóðfíla")

            m3u8_url = gr.Textbox(
                label="m3u8-leinki (t.d. frá kvf.fo ella logting.fo)",
                placeholder="Lím m3u8 leinki her (valfrítt)",
            )

        with gr.Column():
            latest_box = gr.Markdown("### Úrslit (samanbering)\n—")

            # Collapsible history + download
            with gr.Accordion("Tekstur", open=False):
                history_box = gr.Textbox(
                    label="",
                    lines=14,
                    interactive=False,
                    placeholder="Her kemur allur teksturin (søgan) at liggja…",
                )
                download_file = gr.File(
                    label="Tak niður tekst (.txt)",
                    interactive=False
                )

    with gr.Row():
        transcribe_button = gr.Button("Byrja talukenning")
        reset_button = gr.Button("Strika alt")

    transcribe_button.click(
        transcribe,
        inputs=[audio_in, state_var, m3u8_url, model_choices],
        outputs=[state_var, latest_box, history_box, download_file],
    )

    reset_button.click(
        reset_all,
        inputs=[],
        outputs=[state_var, latest_box, history_box, download_file],
    )

demo.queue()
demo.launch()