File size: 3,591 Bytes
96ec5c3
2e9f41b
1db40b9
2e9f41b
96ec5c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1db40b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2e9f41b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import json
import os
import subprocess
import tempfile
from pathlib import Path
from typing import Any

import gradio as gr


def serialize(value: Any) -> Any:
    if isinstance(value, (str, int, float, bool)) or value is None:
        return value
    if isinstance(value, Path):
        return str(value)
    if isinstance(value, dict):
        return {str(k): serialize(v) for k, v in value.items()}
    if isinstance(value, (list, tuple)):
        return [serialize(v) for v in value]
    if hasattr(value, "item"):
        try:
            return value.item()
        except Exception:
            pass
    if hasattr(value, "tolist"):
        try:
            return value.tolist()
        except Exception:
            pass
    if hasattr(value, "__dict__"):
        return {k: serialize(v) for k, v in vars(value).items()}
    return str(value)


def parse_model_options(raw: str | None) -> dict[str, Any]:
    if not raw:
        return {}
    try:
        parsed = json.loads(raw)
    except json.JSONDecodeError as exc:
        raise gr.Error(f"model_options_json must be valid JSON: {exc}") from exc
    if not isinstance(parsed, dict):
        raise gr.Error("model_options_json must decode to a JSON object")
    return parsed


def get_audio_duration_seconds(audio_file: str) -> float | None:
    cmd = [
        "ffprobe",
        "-v",
        "error",
        "-show_entries",
        "format=duration",
        "-of",
        "default=noprint_wrappers=1:nokey=1",
        audio_file,
    ]
    proc = subprocess.run(cmd, capture_output=True, text=True)
    if proc.returncode != 0:
        return None
    try:
        return float(proc.stdout.strip())
    except Exception:
        return None


def extract_audio_clip(
    source_audio_file: str,
    start_seconds: float,
    duration_seconds: float,
    tmpdir: str | None = None,
) -> str:
    tmp_root = tmpdir or tempfile.gettempdir()
    out_path = os.path.join(tmp_root, f"chunk_{int(start_seconds*1000)}_{int(duration_seconds*1000)}.wav")
    cmd = [
        "ffmpeg",
        "-y",
        "-ss",
        str(start_seconds),
        "-t",
        str(duration_seconds),
        "-i",
        source_audio_file,
        "-vn",
        "-ac",
        "1",
        "-ar",
        "16000",
        out_path,
    ]
    proc = subprocess.run(cmd, capture_output=True, text=True)
    if proc.returncode != 0:
        raise gr.Error(f"ffmpeg failed while extracting audio chunk: {proc.stderr[-1200:]}")
    return out_path


def build_audio_chunk_plan(
    audio_file: str,
    chunk_duration_s: float,
    chunk_overlap_s: float,
) -> list[dict[str, float | int]]:
    duration = get_audio_duration_seconds(audio_file)
    if duration is None:
        raise gr.Error("Could not determine audio duration (ffprobe failed).")
    if duration <= 0:
        return []
    if chunk_duration_s <= 0:
        raise gr.Error("chunk_duration_s must be > 0")
    if chunk_overlap_s < 0:
        raise gr.Error("chunk_overlap_s must be >= 0")
    if chunk_overlap_s >= chunk_duration_s:
        raise gr.Error("chunk_overlap_s must be smaller than chunk_duration_s")

    plan = []
    step = chunk_duration_s - chunk_overlap_s
    start = 0.0
    idx = 0
    while start < duration:
        end = min(duration, start + chunk_duration_s)
        plan.append(
            {
                "index": idx,
                "start": start,
                "end": end,
                "duration": end - start,
            }
        )
        if end >= duration:
            break
        start += step
        idx += 1
    return plan