Talukennari

Running

File size: 5,516 Bytes

8fab2a6
0e9f5f3
2fb073b
 
e91f600
 
0e9f5f3
2fb073b
0e9f5f3
8fab2a6
e91f600
 
 
2fb073b
 
 
e91f600
 
 
 
 
 
2fb073b
 
e91f600
 
 
 
 
 
2fb073b
625146d
e91f600
 
 
 
 
 
2fb073b
8fab2a6
 
2fb073b
 
 
8fab2a6
2fb073b
 
23b1388
2fb073b
 
 
 
 
 
 
 
e91f600
2fb073b
23b1388
e91f600
2fb073b
 
 
 
 
 
e91f600
 
 
 
 
 
 
 
 
 
 
8fab2a6
e91f600
 
 
 
625146d
e91f600
2fb073b
e91f600
2fb073b
e91f600
 
 
2fb073b
e91f600
2fb073b
e91f600
 
2fb073b
e91f600
2fb073b
e91f600
 
2fb073b
e91f600
 
 
2fb073b
e91f600
 
2fb073b
e91f600
625146d
e91f600
 
 
23b1388
e91f600
 
8fab2a6
e91f600
8fab2a6
e91f600
 
 
 
 
2fb073b
e91f600
 
625146d
e91f600
 
 
 
 
8fab2a6
 
 
625146d
 
 
 
 
2fb073b
8fab2a6
 
e91f600
 
 
625146d
2fb073b
 
625146d
2fb073b
 
e91f600
625146d
0e9f5f3
8fab2a6
 
e91f600
 
625146d
 
 
 
 
 
 
 
 
 
 
 
8fab2a6
 
 
e91f600
8fab2a6
 
e91f600
 
 
8fab2a6
 
 
e91f600
0e9f5f3
e91f600
8fab2a6
 
 
2fb073b

import gradio as gr
import time
import os
import uuid
from datetime import datetime

import torch
from transformers import pipeline
import ffmpeg

# -----------------------------
# Models
# -----------------------------
W2V2_MODEL = "carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h"
WHISPER_MODEL = "davidilag/whisper-large-no-is-fo-100h-30k-steps"

MODEL_LABELS = {
    "Carlos (wav2vec2 - FO)": W2V2_MODEL,
    "Dávid (Whisper - NO/IS/FO)": WHISPER_MODEL,
}

PIPELINES = {}  # cache: label -> pipeline


# -----------------------------
# Helpers
# -----------------------------
def get_asr_pipeline(model_label: str):
    if model_label in PIPELINES:
        return PIPELINES[model_label]

    device = 0 if torch.cuda.is_available() else -1
    p = pipeline(
        "automatic-speech-recognition",
        model=MODEL_LABELS[model_label],
        device=device,
    )
    PIPELINES[model_label] = p
    return p


def to_16k_wav(input_path: str) -> str:
    if not input_path or not os.path.exists(input_path):
        return ""

    out_path = f"/tmp/{uuid.uuid4().hex}_16k.wav"
    (
        ffmpeg.input(input_path)
        .output(out_path, ac=1, ar=16000, format="wav")
        .overwrite_output()
        .run(quiet=True)
    )
    return out_path


def extract_audio_from_m3u8(url: str) -> str:
    out_path = f"/tmp/{uuid.uuid4().hex}_m3u8.aac"
    (
        ffmpeg.input(url)
        .output(out_path, acodec="copy")
        .overwrite_output()
        .run(quiet=True)
    )
    return out_path


def write_history_file(text: str) -> str:
    path = "/tmp/talukennari_history.txt"
    with open(path, "w", encoding="utf-8") as f:
        f.write(text or "")
    return path


# -----------------------------
# Core Transcription
# -----------------------------
def transcribe(audio_path, state, m3u8_url, model_choices):
    try:
        state = state or ""

        if not model_choices:
            file_path = write_history_file(state)
            return state, "Vel minst ein myndil.", state, file_path

        if m3u8_url and str(m3u8_url).strip():
            audio_path = extract_audio_from_m3u8(str(m3u8_url).strip())

        if not audio_path:
            file_path = write_history_file(state)
            return state, "Einki ljóð er til talukenning.", state, file_path

        wav_path = to_16k_wav(audio_path)
        if not wav_path:
            file_path = write_history_file(state)
            return state, "Einki ljóð er til talukenning.", state, file_path

        stamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

        latest_blocks = []
        history_blocks = []

        for model_label in model_choices:
            p = get_asr_pipeline(model_label)
            time.sleep(0.05)

            result = p(wav_path, chunk_length_s=30)
            text = (result.get("text") or "").strip() or "(Eingin tekstur kom aftur.)"

            latest_blocks.append(f"### {model_label}\n{text}")
            history_blocks.append(f"[{stamp}] {model_label}\n{text}\n")

        latest_text = "\n\n".join(latest_blocks).strip()
        history_entry = "\n".join(history_blocks).strip() + "\n\n"

        state = state + history_entry
        file_path = write_history_file(state)

        return state, latest_text, state, file_path

    except Exception as e:
        state = state or ""
        err = f"Okkurt riggaði ikki í talukenningini: {type(e).__name__}: {e}"
        file_path = write_history_file(state)
        return state, err, state, file_path


def reset_all():
    return "", "### Úrslit (samanbering)\n—", "", None


# -----------------------------
# UI
# -----------------------------
with gr.Blocks() as demo:
    state_var = gr.State("")

    gr.Markdown(
        "## Talukennari\n"
        "Vel ein ella fleiri myndlar og samanber úrslitini. "
        "Teksturin verður goymdur undir **Tekstur** og kann takast niður sum .txt."
    )

    with gr.Row():
        with gr.Column():
            model_choices = gr.CheckboxGroup(
                choices=list(MODEL_LABELS.keys()),
                value=["Carlos (wav2vec2 - FO)"],
                label="Vel ein ella fleiri myndlar",
            )

            audio_in = gr.Audio(type="filepath", label="Mikrofon ella ljóðfíla")

            m3u8_url = gr.Textbox(
                label="m3u8-leinki (t.d. frá kvf.fo ella logting.fo)",
                placeholder="Lím m3u8 leinki her (valfrítt)",
            )

        with gr.Column():
            latest_box = gr.Markdown("### Úrslit (samanbering)\n—")

            # Collapsible history + download
            with gr.Accordion("Tekstur", open=False):
                history_box = gr.Textbox(
                    label="",
                    lines=14,
                    interactive=False,
                    placeholder="Her kemur allur teksturin (søgan) at liggja…",
                )
                download_file = gr.File(
                    label="Tak niður tekst (.txt)",
                    interactive=False
                )

    with gr.Row():
        transcribe_button = gr.Button("Byrja talukenning")
        reset_button = gr.Button("Strika alt")

    transcribe_button.click(
        transcribe,
        inputs=[audio_in, state_var, m3u8_url, model_choices],
        outputs=[state_var, latest_box, history_box, download_file],
    )

    reset_button.click(
        reset_all,
        inputs=[],
        outputs=[state_var, latest_box, history_box, download_file],
    )

demo.queue()
demo.launch()