Spaces:
Running
Running
Commit ·
c7d2aa0
1
Parent(s): 035bf47
Add Schema Documentation To API
Browse files
README.md
CHANGED
|
@@ -25,12 +25,35 @@ Model setup is global/outside `@spaces.GPU` so setup time is not billed to ZeroG
|
|
| 25 |
## API routes
|
| 26 |
- `/run_complete_pipeline`
|
| 27 |
- `/get_debug_output`
|
|
|
|
| 28 |
|
| 29 |
## `/run_complete_pipeline` inputs
|
| 30 |
- `audio_file` (file path from Gradio client upload)
|
| 31 |
- `huggingface_token`
|
| 32 |
|
| 33 |
-
Returns: merged transcript JSON only
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
## `/get_debug_output` inputs
|
| 36 |
- `run_id` (optional)
|
|
@@ -41,6 +64,14 @@ Returns: raw/debug payload for the latest run (or specific run if provided), inc
|
|
| 41 |
- merged transcript payload
|
| 42 |
- aggregated timing
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
## IPython example
|
| 45 |
```python
|
| 46 |
from gradio_client import Client, handle_file
|
|
|
|
| 25 |
## API routes
|
| 26 |
- `/run_complete_pipeline`
|
| 27 |
- `/get_debug_output`
|
| 28 |
+
- `/get_run_complete_pipeline_schema`
|
| 29 |
|
| 30 |
## `/run_complete_pipeline` inputs
|
| 31 |
- `audio_file` (file path from Gradio client upload)
|
| 32 |
- `huggingface_token`
|
| 33 |
|
| 34 |
+
Returns: merged transcript JSON only, with this structure:
|
| 35 |
+
|
| 36 |
+
```json
|
| 37 |
+
{
|
| 38 |
+
"summary": {
|
| 39 |
+
"diarization_key_used": "exclusive_speaker_diarization",
|
| 40 |
+
"parakeet_word_count": 1234,
|
| 41 |
+
"pyannote_segment_count": 42,
|
| 42 |
+
"turn_count": 39,
|
| 43 |
+
"assigned_word_count": 1219,
|
| 44 |
+
"unassigned_word_count": 15
|
| 45 |
+
},
|
| 46 |
+
"turns": [
|
| 47 |
+
{
|
| 48 |
+
"speaker": "SPEAKER_00",
|
| 49 |
+
"start": 0.0,
|
| 50 |
+
"end": 12.34,
|
| 51 |
+
"text": "Good morning and welcome to the earnings call."
|
| 52 |
+
}
|
| 53 |
+
],
|
| 54 |
+
"transcript_text": "[0.00 - 12.34] SPEAKER_00: Good morning and welcome to the earnings call."
|
| 55 |
+
}
|
| 56 |
+
```
|
| 57 |
|
| 58 |
## `/get_debug_output` inputs
|
| 59 |
- `run_id` (optional)
|
|
|
|
| 64 |
- merged transcript payload
|
| 65 |
- aggregated timing
|
| 66 |
|
| 67 |
+
## `/get_run_complete_pipeline_schema` inputs
|
| 68 |
+
- none
|
| 69 |
+
|
| 70 |
+
Returns a machine-readable contract for `/run_complete_pipeline`:
|
| 71 |
+
- `input_schema`
|
| 72 |
+
- `output_schema`
|
| 73 |
+
- `output_example`
|
| 74 |
+
|
| 75 |
## IPython example
|
| 76 |
```python
|
| 77 |
from gradio_client import Client, handle_file
|
app.py
CHANGED
|
@@ -26,6 +26,82 @@ _DEBUG_RUNS: "OrderedDict[str, dict[str, Any]]" = OrderedDict()
|
|
| 26 |
_MAX_DEBUG_RUNS = 10
|
| 27 |
_LAST_DEBUG_RUN_ID: str | None = None
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
def _preload_model(model_label: str, preload_fn) -> None:
|
| 31 |
try:
|
|
@@ -193,6 +269,19 @@ def get_debug_output(run_id: str | None):
|
|
| 193 |
return {"run_id": _LAST_DEBUG_RUN_ID, "debug": _DEBUG_RUNS[_LAST_DEBUG_RUN_ID]}
|
| 194 |
|
| 195 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
with gr.Blocks(title="Parakeet + Pyannote Pipeline") as demo:
|
| 197 |
gr.Markdown(
|
| 198 |
"# End-to-end transcript pipeline\n"
|
|
@@ -216,6 +305,23 @@ with gr.Blocks(title="Parakeet + Pyannote Pipeline") as demo:
|
|
| 216 |
inputs=[audio_file, huggingface_token],
|
| 217 |
outputs=output,
|
| 218 |
api_name="run_complete_pipeline",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
)
|
| 220 |
|
| 221 |
with gr.Row():
|
|
@@ -228,6 +334,21 @@ with gr.Blocks(title="Parakeet + Pyannote Pipeline") as demo:
|
|
| 228 |
inputs=[debug_run_id],
|
| 229 |
outputs=debug_output,
|
| 230 |
api_name="get_debug_output",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
)
|
| 232 |
|
| 233 |
|
|
|
|
| 26 |
_MAX_DEBUG_RUNS = 10
|
| 27 |
_LAST_DEBUG_RUN_ID: str | None = None
|
| 28 |
|
| 29 |
+
RUN_COMPLETE_PIPELINE_OUTPUT_SCHEMA: dict[str, Any] = {
|
| 30 |
+
"type": "object",
|
| 31 |
+
"description": "Merged transcript output from Parakeet (word timestamps) and Pyannote diarization.",
|
| 32 |
+
"properties": {
|
| 33 |
+
"summary": {
|
| 34 |
+
"type": "object",
|
| 35 |
+
"properties": {
|
| 36 |
+
"diarization_key_used": {"type": "string", "example": "exclusive_speaker_diarization"},
|
| 37 |
+
"parakeet_word_count": {"type": "integer"},
|
| 38 |
+
"pyannote_segment_count": {"type": "integer"},
|
| 39 |
+
"turn_count": {"type": "integer"},
|
| 40 |
+
"assigned_word_count": {"type": "integer"},
|
| 41 |
+
"unassigned_word_count": {"type": "integer"},
|
| 42 |
+
},
|
| 43 |
+
"required": [
|
| 44 |
+
"diarization_key_used",
|
| 45 |
+
"parakeet_word_count",
|
| 46 |
+
"pyannote_segment_count",
|
| 47 |
+
"turn_count",
|
| 48 |
+
"assigned_word_count",
|
| 49 |
+
"unassigned_word_count",
|
| 50 |
+
],
|
| 51 |
+
},
|
| 52 |
+
"turns": {
|
| 53 |
+
"type": "array",
|
| 54 |
+
"items": {
|
| 55 |
+
"type": "object",
|
| 56 |
+
"properties": {
|
| 57 |
+
"speaker": {"type": "string", "example": "SPEAKER_02"},
|
| 58 |
+
"start": {"type": "number", "example": 40.72},
|
| 59 |
+
"end": {"type": "number", "example": 514.0},
|
| 60 |
+
"text": {"type": "string"},
|
| 61 |
+
},
|
| 62 |
+
"required": ["speaker", "start", "end", "text"],
|
| 63 |
+
},
|
| 64 |
+
},
|
| 65 |
+
"transcript_text": {"type": "string"},
|
| 66 |
+
},
|
| 67 |
+
"required": ["summary", "turns", "transcript_text"],
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
RUN_COMPLETE_PIPELINE_OUTPUT_EXAMPLE: dict[str, Any] = {
|
| 71 |
+
"summary": {
|
| 72 |
+
"diarization_key_used": "exclusive_speaker_diarization",
|
| 73 |
+
"parakeet_word_count": 1234,
|
| 74 |
+
"pyannote_segment_count": 42,
|
| 75 |
+
"turn_count": 39,
|
| 76 |
+
"assigned_word_count": 1219,
|
| 77 |
+
"unassigned_word_count": 15,
|
| 78 |
+
},
|
| 79 |
+
"turns": [
|
| 80 |
+
{
|
| 81 |
+
"speaker": "SPEAKER_00",
|
| 82 |
+
"start": 0.0,
|
| 83 |
+
"end": 12.34,
|
| 84 |
+
"text": "Good morning and welcome to the earnings call.",
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"speaker": "SPEAKER_01",
|
| 88 |
+
"start": 12.34,
|
| 89 |
+
"end": 19.02,
|
| 90 |
+
"text": "Thank you. Let us begin with quarterly highlights.",
|
| 91 |
+
},
|
| 92 |
+
],
|
| 93 |
+
"transcript_text": "[0.00 - 12.34] SPEAKER_00: Good morning ...",
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
RUN_COMPLETE_PIPELINE_INPUT_SCHEMA: dict[str, Any] = {
|
| 97 |
+
"type": "object",
|
| 98 |
+
"properties": {
|
| 99 |
+
"audio_file": {"type": "file", "description": "Audio file upload"},
|
| 100 |
+
"huggingface_token": {"type": "string", "description": "HF access token for pyannote model"},
|
| 101 |
+
},
|
| 102 |
+
"required": ["audio_file", "huggingface_token"],
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
|
| 106 |
def _preload_model(model_label: str, preload_fn) -> None:
|
| 107 |
try:
|
|
|
|
| 269 |
return {"run_id": _LAST_DEBUG_RUN_ID, "debug": _DEBUG_RUNS[_LAST_DEBUG_RUN_ID]}
|
| 270 |
|
| 271 |
|
| 272 |
+
def get_run_complete_pipeline_schema() -> dict[str, Any]:
|
| 273 |
+
return {
|
| 274 |
+
"api_name": "/run_complete_pipeline",
|
| 275 |
+
"input_schema": RUN_COMPLETE_PIPELINE_INPUT_SCHEMA,
|
| 276 |
+
"output_schema": RUN_COMPLETE_PIPELINE_OUTPUT_SCHEMA,
|
| 277 |
+
"output_example": RUN_COMPLETE_PIPELINE_OUTPUT_EXAMPLE,
|
| 278 |
+
"notes": [
|
| 279 |
+
"Use /get_debug_output to fetch raw model payloads and timing.",
|
| 280 |
+
"The production route returns only merged transcript JSON.",
|
| 281 |
+
],
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
|
| 285 |
with gr.Blocks(title="Parakeet + Pyannote Pipeline") as demo:
|
| 286 |
gr.Markdown(
|
| 287 |
"# End-to-end transcript pipeline\n"
|
|
|
|
| 305 |
inputs=[audio_file, huggingface_token],
|
| 306 |
outputs=output,
|
| 307 |
api_name="run_complete_pipeline",
|
| 308 |
+
api_description=(
|
| 309 |
+
"Run Parakeet + Pyannote and return merged transcript JSON.\n"
|
| 310 |
+
"Response shape:\n"
|
| 311 |
+
"{\n"
|
| 312 |
+
' "summary": {\n'
|
| 313 |
+
' "diarization_key_used": str,\n'
|
| 314 |
+
' "parakeet_word_count": int,\n'
|
| 315 |
+
' "pyannote_segment_count": int,\n'
|
| 316 |
+
' "turn_count": int,\n'
|
| 317 |
+
' "assigned_word_count": int,\n'
|
| 318 |
+
' "unassigned_word_count": int\n'
|
| 319 |
+
" },\n"
|
| 320 |
+
' "turns": [{"speaker": str, "start": float, "end": float, "text": str}],\n'
|
| 321 |
+
' "transcript_text": str\n'
|
| 322 |
+
"}\n"
|
| 323 |
+
"For full machine-readable schema + example, call /get_run_complete_pipeline_schema."
|
| 324 |
+
),
|
| 325 |
)
|
| 326 |
|
| 327 |
with gr.Row():
|
|
|
|
| 334 |
inputs=[debug_run_id],
|
| 335 |
outputs=debug_output,
|
| 336 |
api_name="get_debug_output",
|
| 337 |
+
api_description=(
|
| 338 |
+
"Return latest (or selected) debug payload including raw Parakeet/Pyannote outputs "
|
| 339 |
+
"and aggregated pipeline timing."
|
| 340 |
+
),
|
| 341 |
+
)
|
| 342 |
+
|
| 343 |
+
with gr.Row(visible=False):
|
| 344 |
+
schema_btn = gr.Button("get_run_complete_pipeline_schema")
|
| 345 |
+
schema_output = gr.JSON(label="run_complete_pipeline schema", visible=False)
|
| 346 |
+
schema_btn.click(
|
| 347 |
+
fn=get_run_complete_pipeline_schema,
|
| 348 |
+
inputs=None,
|
| 349 |
+
outputs=schema_output,
|
| 350 |
+
api_name="get_run_complete_pipeline_schema",
|
| 351 |
+
api_description="Return input/output schema contract for /run_complete_pipeline.",
|
| 352 |
)
|
| 353 |
|
| 354 |
|