Spaces:
Running on Zero
Running on Zero
File size: 3,591 Bytes
96ec5c3 2e9f41b 1db40b9 2e9f41b 96ec5c3 1db40b9 2e9f41b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 | import json
import os
import subprocess
import tempfile
from pathlib import Path
from typing import Any
import gradio as gr
def serialize(value: Any) -> Any:
if isinstance(value, (str, int, float, bool)) or value is None:
return value
if isinstance(value, Path):
return str(value)
if isinstance(value, dict):
return {str(k): serialize(v) for k, v in value.items()}
if isinstance(value, (list, tuple)):
return [serialize(v) for v in value]
if hasattr(value, "item"):
try:
return value.item()
except Exception:
pass
if hasattr(value, "tolist"):
try:
return value.tolist()
except Exception:
pass
if hasattr(value, "__dict__"):
return {k: serialize(v) for k, v in vars(value).items()}
return str(value)
def parse_model_options(raw: str | None) -> dict[str, Any]:
if not raw:
return {}
try:
parsed = json.loads(raw)
except json.JSONDecodeError as exc:
raise gr.Error(f"model_options_json must be valid JSON: {exc}") from exc
if not isinstance(parsed, dict):
raise gr.Error("model_options_json must decode to a JSON object")
return parsed
def get_audio_duration_seconds(audio_file: str) -> float | None:
cmd = [
"ffprobe",
"-v",
"error",
"-show_entries",
"format=duration",
"-of",
"default=noprint_wrappers=1:nokey=1",
audio_file,
]
proc = subprocess.run(cmd, capture_output=True, text=True)
if proc.returncode != 0:
return None
try:
return float(proc.stdout.strip())
except Exception:
return None
def extract_audio_clip(
source_audio_file: str,
start_seconds: float,
duration_seconds: float,
tmpdir: str | None = None,
) -> str:
tmp_root = tmpdir or tempfile.gettempdir()
out_path = os.path.join(tmp_root, f"chunk_{int(start_seconds*1000)}_{int(duration_seconds*1000)}.wav")
cmd = [
"ffmpeg",
"-y",
"-ss",
str(start_seconds),
"-t",
str(duration_seconds),
"-i",
source_audio_file,
"-vn",
"-ac",
"1",
"-ar",
"16000",
out_path,
]
proc = subprocess.run(cmd, capture_output=True, text=True)
if proc.returncode != 0:
raise gr.Error(f"ffmpeg failed while extracting audio chunk: {proc.stderr[-1200:]}")
return out_path
def build_audio_chunk_plan(
audio_file: str,
chunk_duration_s: float,
chunk_overlap_s: float,
) -> list[dict[str, float | int]]:
duration = get_audio_duration_seconds(audio_file)
if duration is None:
raise gr.Error("Could not determine audio duration (ffprobe failed).")
if duration <= 0:
return []
if chunk_duration_s <= 0:
raise gr.Error("chunk_duration_s must be > 0")
if chunk_overlap_s < 0:
raise gr.Error("chunk_overlap_s must be >= 0")
if chunk_overlap_s >= chunk_duration_s:
raise gr.Error("chunk_overlap_s must be smaller than chunk_duration_s")
plan = []
step = chunk_duration_s - chunk_overlap_s
start = 0.0
idx = 0
while start < duration:
end = min(duration, start + chunk_duration_s)
plan.append(
{
"index": idx,
"start": start,
"end": end,
"duration": end - start,
}
)
if end >= duration:
break
start += step
idx += 1
return plan
|