Spaces:
Running on Zero
Running on Zero
Commit ·
7def15a
1
Parent(s): 667e520
Report measured ZeroGPU inference time
Browse files
app.py
CHANGED
|
@@ -4,6 +4,7 @@ import csv
|
|
| 4 |
import io
|
| 5 |
import subprocess
|
| 6 |
import tempfile
|
|
|
|
| 7 |
from pathlib import Path
|
| 8 |
from typing import Any
|
| 9 |
|
|
@@ -102,9 +103,10 @@ def _run_diarization(
|
|
| 102 |
audio_path: str,
|
| 103 |
hf_token: str,
|
| 104 |
prefer_exclusive: bool,
|
| 105 |
-
) -> tuple[list[dict[str, Any]], str, str]:
|
| 106 |
pipeline = get_pipeline(hf_token)
|
| 107 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
| 108 |
|
| 109 |
pipeline.to(device)
|
| 110 |
try:
|
|
@@ -116,6 +118,8 @@ def _run_diarization(
|
|
| 116 |
pipeline.to(torch.device("cpu"))
|
| 117 |
torch.cuda.empty_cache()
|
| 118 |
|
|
|
|
|
|
|
| 119 |
annotation = output.speaker_diarization
|
| 120 |
annotation_label = "speaker_diarization"
|
| 121 |
|
|
@@ -140,7 +144,7 @@ def _run_diarization(
|
|
| 140 |
rttm_buffer = io.StringIO()
|
| 141 |
annotation.write_rttm(rttm_buffer)
|
| 142 |
|
| 143 |
-
return segments, rttm_buffer.getvalue(), annotation_label
|
| 144 |
|
| 145 |
|
| 146 |
def _write_artifacts(segments: list[dict[str, Any]], rttm_text: str) -> list[str]:
|
|
@@ -183,7 +187,7 @@ def diarize(
|
|
| 183 |
# Load on CPU first so the ZeroGPU decorator only wraps actual inference.
|
| 184 |
get_pipeline(resolved_token)
|
| 185 |
|
| 186 |
-
segments, rttm_text, annotation_label = _run_diarization(
|
| 187 |
audio_path=normalized_audio_path,
|
| 188 |
hf_token=resolved_token,
|
| 189 |
prefer_exclusive=prefer_exclusive,
|
|
@@ -194,7 +198,8 @@ def diarize(
|
|
| 194 |
"### No active speaker segments were detected\n"
|
| 195 |
f"Inference completed with `{annotation_label}` output, but it contained no segments."
|
| 196 |
)
|
| 197 |
-
|
|
|
|
| 198 |
|
| 199 |
unique_speakers = sorted({segment["speaker"] for segment in segments})
|
| 200 |
total_speech = sum(segment["duration"] for segment in segments)
|
|
@@ -204,7 +209,8 @@ def diarize(
|
|
| 204 |
f"- Output used: `{annotation_label}`\n"
|
| 205 |
f"- Segments: **{len(segments)}**\n"
|
| 206 |
f"- Speakers detected: **{len(unique_speakers)}** ({', '.join(unique_speakers)})\n"
|
| 207 |
-
f"- Total labelled speech: **{total_speech:.2f}s**"
|
|
|
|
| 208 |
)
|
| 209 |
|
| 210 |
table = [
|
|
@@ -223,7 +229,7 @@ def diarize(
|
|
| 223 |
)
|
| 224 |
|
| 225 |
artifacts = _write_artifacts(segments, rttm_text)
|
| 226 |
-
return summary, table, turns_text, artifacts
|
| 227 |
|
| 228 |
|
| 229 |
def build_demo() -> gr.Blocks:
|
|
@@ -262,6 +268,7 @@ def build_demo() -> gr.Blocks:
|
|
| 262 |
|
| 263 |
with gr.Column(scale=1):
|
| 264 |
summary_output = gr.Markdown()
|
|
|
|
| 265 |
segments_output = gr.Dataframe(
|
| 266 |
headers=["Speaker", "Start", "End", "Duration (s)"],
|
| 267 |
datatype=["str", "str", "str", "number"],
|
|
@@ -282,7 +289,7 @@ def build_demo() -> gr.Blocks:
|
|
| 282 |
token_input,
|
| 283 |
prefer_exclusive,
|
| 284 |
],
|
| 285 |
-
outputs=[summary_output, segments_output, turns_output, files_output],
|
| 286 |
)
|
| 287 |
|
| 288 |
gr.Markdown(
|
|
|
|
| 4 |
import io
|
| 5 |
import subprocess
|
| 6 |
import tempfile
|
| 7 |
+
import time
|
| 8 |
from pathlib import Path
|
| 9 |
from typing import Any
|
| 10 |
|
|
|
|
| 103 |
audio_path: str,
|
| 104 |
hf_token: str,
|
| 105 |
prefer_exclusive: bool,
|
| 106 |
+
) -> tuple[list[dict[str, Any]], str, str, float]:
|
| 107 |
pipeline = get_pipeline(hf_token)
|
| 108 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 109 |
+
started_at = time.perf_counter()
|
| 110 |
|
| 111 |
pipeline.to(device)
|
| 112 |
try:
|
|
|
|
| 118 |
pipeline.to(torch.device("cpu"))
|
| 119 |
torch.cuda.empty_cache()
|
| 120 |
|
| 121 |
+
zerogpu_seconds = time.perf_counter() - started_at
|
| 122 |
+
|
| 123 |
annotation = output.speaker_diarization
|
| 124 |
annotation_label = "speaker_diarization"
|
| 125 |
|
|
|
|
| 144 |
rttm_buffer = io.StringIO()
|
| 145 |
annotation.write_rttm(rttm_buffer)
|
| 146 |
|
| 147 |
+
return segments, rttm_buffer.getvalue(), annotation_label, zerogpu_seconds
|
| 148 |
|
| 149 |
|
| 150 |
def _write_artifacts(segments: list[dict[str, Any]], rttm_text: str) -> list[str]:
|
|
|
|
| 187 |
# Load on CPU first so the ZeroGPU decorator only wraps actual inference.
|
| 188 |
get_pipeline(resolved_token)
|
| 189 |
|
| 190 |
+
segments, rttm_text, annotation_label, zerogpu_seconds = _run_diarization(
|
| 191 |
audio_path=normalized_audio_path,
|
| 192 |
hf_token=resolved_token,
|
| 193 |
prefer_exclusive=prefer_exclusive,
|
|
|
|
| 198 |
"### No active speaker segments were detected\n"
|
| 199 |
f"Inference completed with `{annotation_label}` output, but it contained no segments."
|
| 200 |
)
|
| 201 |
+
summary += f"\n- ZeroGPU time used: **{zerogpu_seconds:.2f}s**"
|
| 202 |
+
return summary, round(zerogpu_seconds, 3), [], "", _write_artifacts(segments, rttm_text)
|
| 203 |
|
| 204 |
unique_speakers = sorted({segment["speaker"] for segment in segments})
|
| 205 |
total_speech = sum(segment["duration"] for segment in segments)
|
|
|
|
| 209 |
f"- Output used: `{annotation_label}`\n"
|
| 210 |
f"- Segments: **{len(segments)}**\n"
|
| 211 |
f"- Speakers detected: **{len(unique_speakers)}** ({', '.join(unique_speakers)})\n"
|
| 212 |
+
f"- Total labelled speech: **{total_speech:.2f}s**\n"
|
| 213 |
+
f"- ZeroGPU time used: **{zerogpu_seconds:.2f}s**"
|
| 214 |
)
|
| 215 |
|
| 216 |
table = [
|
|
|
|
| 229 |
)
|
| 230 |
|
| 231 |
artifacts = _write_artifacts(segments, rttm_text)
|
| 232 |
+
return summary, round(zerogpu_seconds, 3), table, turns_text, artifacts
|
| 233 |
|
| 234 |
|
| 235 |
def build_demo() -> gr.Blocks:
|
|
|
|
| 268 |
|
| 269 |
with gr.Column(scale=1):
|
| 270 |
summary_output = gr.Markdown()
|
| 271 |
+
zerogpu_seconds_output = gr.Number(label="ZeroGPU seconds used", precision=3)
|
| 272 |
segments_output = gr.Dataframe(
|
| 273 |
headers=["Speaker", "Start", "End", "Duration (s)"],
|
| 274 |
datatype=["str", "str", "str", "number"],
|
|
|
|
| 289 |
token_input,
|
| 290 |
prefer_exclusive,
|
| 291 |
],
|
| 292 |
+
outputs=[summary_output, zerogpu_seconds_output, segments_output, turns_output, files_output],
|
| 293 |
)
|
| 294 |
|
| 295 |
gr.Markdown(
|