Spaces:
Running on Zero
Running on Zero
Update API doc
Browse files- docs/client_api.md +13 -6
- src/api/session_api.py +24 -1
docs/client_api.md
CHANGED
|
@@ -9,12 +9,18 @@
|
|
| 9 |
|
| 10 |
---
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
## Quick Start
|
| 13 |
|
| 14 |
```python
|
| 15 |
from gradio_client import Client
|
| 16 |
|
| 17 |
-
client = Client("https://
|
| 18 |
|
| 19 |
# Full pipeline
|
| 20 |
result = client.predict(
|
|
@@ -87,7 +93,7 @@ Processes a recitation audio file: detects speech segments, recognizes text, and
|
|
| 87 |
| `min_silence_ms` | int | 200 | Minimum silence gap to split segments |
|
| 88 |
| `min_speech_ms` | int | 1000 | Minimum speech duration to keep a segment |
|
| 89 |
| `pad_ms` | int | 100 | Padding added to each side of a segment |
|
| 90 |
-
| `model_name` | str | `"Base"` | `"Base"` (faster) or `"Large"` (more accurate) |
|
| 91 |
| `device` | str | `"GPU"` | `"GPU"` or `"CPU"` |
|
| 92 |
|
| 93 |
If the GPU is temporarily unavailable, processing continues on CPU (slower). When this happens, a `"warning"` field is included in the response (see [GPU Fallback Warning](#gpu-fallback-warning)).
|
|
@@ -146,7 +152,7 @@ Re-splits the audio into segments using different silence/speech settings, then
|
|
| 146 |
| `min_silence_ms` | int | 200 | New minimum silence gap |
|
| 147 |
| `min_speech_ms` | int | 1000 | New minimum speech duration |
|
| 148 |
| `pad_ms` | int | 100 | New padding |
|
| 149 |
-
| `model_name` | str | `"Base"` | `"Base"` or `"Large"` |
|
| 150 |
| `device` | str | `"GPU"` | `"GPU"` or `"CPU"` |
|
| 151 |
|
| 152 |
**Response:** Same shape as `/process_audio_session`. Session boundaries are updated.
|
|
@@ -160,7 +166,7 @@ Re-recognizes text using a different model on the same segments, then re-aligns.
|
|
| 160 |
| Parameter | Type | Default | Description |
|
| 161 |
|---|---|---|---|
|
| 162 |
| `audio_id` | str | required | Session ID from a previous call |
|
| 163 |
-
| `model_name` | str | `"Base"` | `"Base"` or `"Large"` |
|
| 164 |
| `device` | str | `"GPU"` | `"GPU"` or `"CPU"` |
|
| 165 |
|
| 166 |
**Response:** Same shape as `/process_audio_session`. Session model and results are updated.
|
|
@@ -177,7 +183,7 @@ Aligns audio using custom time boundaries you provide. Useful for manually adjus
|
|
| 177 |
|---|---|---|---|
|
| 178 |
| `audio_id` | str | required | Session ID from a previous call |
|
| 179 |
| `timestamps` | list | required | Array of `{"start": float, "end": float}` in seconds |
|
| 180 |
-
| `model_name` | str | `"Base"` | `"Base"` or `"Large"` |
|
| 181 |
| `device` | str | `"GPU"` | `"GPU"` or `"CPU"` |
|
| 182 |
|
| 183 |
**Example request body:**
|
|
@@ -313,7 +319,7 @@ Estimate processing time before starting a request.
|
|
| 313 |
| `endpoint` | str | required | Target endpoint name (e.g. `"process_audio_session"`) |
|
| 314 |
| `audio_duration_s` | float | `None` | Audio length in seconds. Required if no `audio_id` |
|
| 315 |
| `audio_id` | str | `None` | Session ID — looks up audio duration from the session |
|
| 316 |
-
| `model_name` | str | `"Base"` | `"Base"` or `"Large"` |
|
| 317 |
| `device` | str | `"GPU"` | `"GPU"` or `"CPU"` |
|
| 318 |
|
| 319 |
**Example — before first processing call:**
|
|
@@ -425,6 +431,7 @@ All errors follow the same shape: `{"error": "...", "segments": []}`. Endpoints
|
|
| 425 |
| Session not found or expired | `"Session not found or expired"` | No |
|
| 426 |
| No speech detected (process) | `"No speech detected in audio"` | No (no session created) |
|
| 427 |
| No segments after resegment | `"No segments with these settings"` | Yes |
|
|
|
|
| 428 |
| Retranscribe with same model | `"Model and boundaries unchanged. Change model_name or call /resegment first."` | Yes |
|
| 429 |
| Retranscription failed | `"Retranscription failed"` | Yes |
|
| 430 |
| Realignment failed | `"Alignment failed"` | Yes |
|
|
|
|
| 9 |
|
| 10 |
---
|
| 11 |
|
| 12 |
+
> **GPU Usage & Access**
|
| 13 |
+
>
|
| 14 |
+
> - **Free Tier:** Every user receives **free daily zero-cost GPU quota**. Once your daily GPU quota is exhausted, you can continue using unlimited CPU processing for all endpoints.
|
| 15 |
+
> - **Unlimited GPU Access:** If you need unlimited API access on GPU (e.g., for high-volume or production use), please get in touch to arrange a payment plan and higher limits.
|
| 16 |
+
> - **Note:** CPU processing is always unlimited and available, but is much slower. When GPU quota is exceeded, requests will be automatically routed to CPU and a warning will appear in the response.
|
| 17 |
+
|
| 18 |
## Quick Start
|
| 19 |
|
| 20 |
```python
|
| 21 |
from gradio_client import Client
|
| 22 |
|
| 23 |
+
client = Client("https://hetchyy-quran-multi-aligner.hf.space")
|
| 24 |
|
| 25 |
# Full pipeline
|
| 26 |
result = client.predict(
|
|
|
|
| 93 |
| `min_silence_ms` | int | 200 | Minimum silence gap to split segments |
|
| 94 |
| `min_speech_ms` | int | 1000 | Minimum speech duration to keep a segment |
|
| 95 |
| `pad_ms` | int | 100 | Padding added to each side of a segment |
|
| 96 |
+
| `model_name` | str | `"Base"` | `"Base"` (faster) or `"Large"` (more accurate). **Only these two values are accepted** — any other value will cause an error |
|
| 97 |
| `device` | str | `"GPU"` | `"GPU"` or `"CPU"` |
|
| 98 |
|
| 99 |
If the GPU is temporarily unavailable, processing continues on CPU (slower). When this happens, a `"warning"` field is included in the response (see [GPU Fallback Warning](#gpu-fallback-warning)).
|
|
|
|
| 152 |
| `min_silence_ms` | int | 200 | New minimum silence gap |
|
| 153 |
| `min_speech_ms` | int | 1000 | New minimum speech duration |
|
| 154 |
| `pad_ms` | int | 100 | New padding |
|
| 155 |
+
| `model_name` | str | `"Base"` | `"Base"` or `"Large"` only |
|
| 156 |
| `device` | str | `"GPU"` | `"GPU"` or `"CPU"` |
|
| 157 |
|
| 158 |
**Response:** Same shape as `/process_audio_session`. Session boundaries are updated.
|
|
|
|
| 166 |
| Parameter | Type | Default | Description |
|
| 167 |
|---|---|---|---|
|
| 168 |
| `audio_id` | str | required | Session ID from a previous call |
|
| 169 |
+
| `model_name` | str | `"Base"` | `"Base"` or `"Large"` only |
|
| 170 |
| `device` | str | `"GPU"` | `"GPU"` or `"CPU"` |
|
| 171 |
|
| 172 |
**Response:** Same shape as `/process_audio_session`. Session model and results are updated.
|
|
|
|
| 183 |
|---|---|---|---|
|
| 184 |
| `audio_id` | str | required | Session ID from a previous call |
|
| 185 |
| `timestamps` | list | required | Array of `{"start": float, "end": float}` in seconds |
|
| 186 |
+
| `model_name` | str | `"Base"` | `"Base"` or `"Large"` only |
|
| 187 |
| `device` | str | `"GPU"` | `"GPU"` or `"CPU"` |
|
| 188 |
|
| 189 |
**Example request body:**
|
|
|
|
| 319 |
| `endpoint` | str | required | Target endpoint name (e.g. `"process_audio_session"`) |
|
| 320 |
| `audio_duration_s` | float | `None` | Audio length in seconds. Required if no `audio_id` |
|
| 321 |
| `audio_id` | str | `None` | Session ID — looks up audio duration from the session |
|
| 322 |
+
| `model_name` | str | `"Base"` | `"Base"` or `"Large"` only |
|
| 323 |
| `device` | str | `"GPU"` | `"GPU"` or `"CPU"` |
|
| 324 |
|
| 325 |
**Example — before first processing call:**
|
|
|
|
| 431 |
| Session not found or expired | `"Session not found or expired"` | No |
|
| 432 |
| No speech detected (process) | `"No speech detected in audio"` | No (no session created) |
|
| 433 |
| No segments after resegment | `"No segments with these settings"` | Yes |
|
| 434 |
+
| Invalid model name | `"Invalid model_name '...'. Must be one of: Base, Large"` | Depends on endpoint |
|
| 435 |
| Retranscribe with same model | `"Model and boundaries unchanged. Change model_name or call /resegment first."` | Yes |
|
| 436 |
| Retranscription failed | `"Retranscription failed"` | Yes |
|
| 437 |
| Realignment failed | `"Alignment failed"` | Yes |
|
src/api/session_api.py
CHANGED
|
@@ -18,7 +18,7 @@ import uuid
|
|
| 18 |
import gradio as gr
|
| 19 |
import numpy as np
|
| 20 |
|
| 21 |
-
from config import SESSION_DIR, SESSION_EXPIRY_SECONDS
|
| 22 |
from src.core.zero_gpu import QuotaExhaustedError
|
| 23 |
|
| 24 |
# ---------------------------------------------------------------------------
|
|
@@ -29,6 +29,14 @@ _last_cleanup_time = 0.0
|
|
| 29 |
_CLEANUP_INTERVAL = 1800 # sweep at most every 30 min
|
| 30 |
|
| 31 |
_VALID_ID = re.compile(r"^[0-9a-f]{32}$")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
|
| 34 |
def _session_dir(audio_id: str):
|
|
@@ -332,6 +340,9 @@ def process_audio_session(audio_data, min_silence_ms, min_speech_ms, pad_ms,
|
|
| 332 |
model_name="Base", device="GPU",
|
| 333 |
request: gr.Request = None):
|
| 334 |
"""Full pipeline: preprocess -> VAD -> ASR -> alignment. Creates session."""
|
|
|
|
|
|
|
|
|
|
| 335 |
from src.pipeline import process_audio
|
| 336 |
|
| 337 |
quota_warning = None
|
|
@@ -368,6 +379,10 @@ def resegment(audio_id, min_silence_ms, min_speech_ms, pad_ms,
|
|
| 368 |
model_name="Base", device="GPU",
|
| 369 |
request: gr.Request = None):
|
| 370 |
"""Re-clean VAD boundaries with new params and re-run ASR + alignment."""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 371 |
session = load_session(audio_id)
|
| 372 |
if session is None:
|
| 373 |
return _SESSION_ERROR
|
|
@@ -403,6 +418,10 @@ def resegment(audio_id, min_silence_ms, min_speech_ms, pad_ms,
|
|
| 403 |
def retranscribe(audio_id, model_name="Base", device="GPU",
|
| 404 |
request: gr.Request = None):
|
| 405 |
"""Re-run ASR with a different model on current segment boundaries."""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 406 |
session = load_session(audio_id)
|
| 407 |
if session is None:
|
| 408 |
return _SESSION_ERROR
|
|
@@ -446,6 +465,10 @@ def retranscribe(audio_id, model_name="Base", device="GPU",
|
|
| 446 |
def realign_from_timestamps(audio_id, timestamps, model_name="Base", device="GPU",
|
| 447 |
request: gr.Request = None):
|
| 448 |
"""Run ASR + alignment on caller-provided timestamp intervals."""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 449 |
session = load_session(audio_id)
|
| 450 |
if session is None:
|
| 451 |
return _SESSION_ERROR
|
|
|
|
| 18 |
import gradio as gr
|
| 19 |
import numpy as np
|
| 20 |
|
| 21 |
+
from config import SESSION_DIR, SESSION_EXPIRY_SECONDS, PHONEME_ASR_MODELS
|
| 22 |
from src.core.zero_gpu import QuotaExhaustedError
|
| 23 |
|
| 24 |
# ---------------------------------------------------------------------------
|
|
|
|
| 29 |
_CLEANUP_INTERVAL = 1800 # sweep at most every 30 min
|
| 30 |
|
| 31 |
_VALID_ID = re.compile(r"^[0-9a-f]{32}$")
|
| 32 |
+
_VALID_MODELS = set(PHONEME_ASR_MODELS.keys())
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def _validate_model_name(model_name):
|
| 36 |
+
"""Return an error dict if model_name is invalid, else None."""
|
| 37 |
+
if model_name not in _VALID_MODELS:
|
| 38 |
+
valid = ", ".join(sorted(_VALID_MODELS))
|
| 39 |
+
return {"error": f"Invalid model_name '{model_name}'. Must be one of: {valid}", "segments": []}
|
| 40 |
|
| 41 |
|
| 42 |
def _session_dir(audio_id: str):
|
|
|
|
| 340 |
model_name="Base", device="GPU",
|
| 341 |
request: gr.Request = None):
|
| 342 |
"""Full pipeline: preprocess -> VAD -> ASR -> alignment. Creates session."""
|
| 343 |
+
err = _validate_model_name(model_name)
|
| 344 |
+
if err:
|
| 345 |
+
return err
|
| 346 |
from src.pipeline import process_audio
|
| 347 |
|
| 348 |
quota_warning = None
|
|
|
|
| 379 |
model_name="Base", device="GPU",
|
| 380 |
request: gr.Request = None):
|
| 381 |
"""Re-clean VAD boundaries with new params and re-run ASR + alignment."""
|
| 382 |
+
err = _validate_model_name(model_name)
|
| 383 |
+
if err:
|
| 384 |
+
err["audio_id"] = audio_id
|
| 385 |
+
return err
|
| 386 |
session = load_session(audio_id)
|
| 387 |
if session is None:
|
| 388 |
return _SESSION_ERROR
|
|
|
|
| 418 |
def retranscribe(audio_id, model_name="Base", device="GPU",
|
| 419 |
request: gr.Request = None):
|
| 420 |
"""Re-run ASR with a different model on current segment boundaries."""
|
| 421 |
+
err = _validate_model_name(model_name)
|
| 422 |
+
if err:
|
| 423 |
+
err["audio_id"] = audio_id
|
| 424 |
+
return err
|
| 425 |
session = load_session(audio_id)
|
| 426 |
if session is None:
|
| 427 |
return _SESSION_ERROR
|
|
|
|
| 465 |
def realign_from_timestamps(audio_id, timestamps, model_name="Base", device="GPU",
|
| 466 |
request: gr.Request = None):
|
| 467 |
"""Run ASR + alignment on caller-provided timestamp intervals."""
|
| 468 |
+
err = _validate_model_name(model_name)
|
| 469 |
+
if err:
|
| 470 |
+
err["audio_id"] = audio_id
|
| 471 |
+
return err
|
| 472 |
session = load_session(audio_id)
|
| 473 |
if session is None:
|
| 474 |
return _SESSION_ERROR
|