Ratnesh-dev commited on
Commit
c7d2aa0
·
1 Parent(s): 035bf47

Add Schema Documentation To API

Browse files
Files changed (2) hide show
  1. README.md +32 -1
  2. app.py +121 -0
README.md CHANGED
@@ -25,12 +25,35 @@ Model setup is global/outside `@spaces.GPU` so setup time is not billed to ZeroG
25
  ## API routes
26
  - `/run_complete_pipeline`
27
  - `/get_debug_output`
 
28
 
29
  ## `/run_complete_pipeline` inputs
30
  - `audio_file` (file path from Gradio client upload)
31
  - `huggingface_token`
32
 
33
- Returns: merged transcript JSON only.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  ## `/get_debug_output` inputs
36
  - `run_id` (optional)
@@ -41,6 +64,14 @@ Returns: raw/debug payload for the latest run (or specific run if provided), inc
41
  - merged transcript payload
42
  - aggregated timing
43
 
 
 
 
 
 
 
 
 
44
  ## IPython example
45
  ```python
46
  from gradio_client import Client, handle_file
 
25
  ## API routes
26
  - `/run_complete_pipeline`
27
  - `/get_debug_output`
28
+ - `/get_run_complete_pipeline_schema`
29
 
30
  ## `/run_complete_pipeline` inputs
31
  - `audio_file` (file path from Gradio client upload)
32
  - `huggingface_token`
33
 
34
+ Returns: merged transcript JSON only, with this structure:
35
+
36
+ ```json
37
+ {
38
+ "summary": {
39
+ "diarization_key_used": "exclusive_speaker_diarization",
40
+ "parakeet_word_count": 1234,
41
+ "pyannote_segment_count": 42,
42
+ "turn_count": 39,
43
+ "assigned_word_count": 1219,
44
+ "unassigned_word_count": 15
45
+ },
46
+ "turns": [
47
+ {
48
+ "speaker": "SPEAKER_00",
49
+ "start": 0.0,
50
+ "end": 12.34,
51
+ "text": "Good morning and welcome to the earnings call."
52
+ }
53
+ ],
54
+ "transcript_text": "[0.00 - 12.34] SPEAKER_00: Good morning and welcome to the earnings call."
55
+ }
56
+ ```
57
 
58
  ## `/get_debug_output` inputs
59
  - `run_id` (optional)
 
64
  - merged transcript payload
65
  - aggregated timing
66
 
67
+ ## `/get_run_complete_pipeline_schema` inputs
68
+ - none
69
+
70
+ Returns a machine-readable contract for `/run_complete_pipeline`:
71
+ - `input_schema`
72
+ - `output_schema`
73
+ - `output_example`
74
+
75
  ## IPython example
76
  ```python
77
  from gradio_client import Client, handle_file
app.py CHANGED
@@ -26,6 +26,82 @@ _DEBUG_RUNS: "OrderedDict[str, dict[str, Any]]" = OrderedDict()
26
  _MAX_DEBUG_RUNS = 10
27
  _LAST_DEBUG_RUN_ID: str | None = None
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  def _preload_model(model_label: str, preload_fn) -> None:
31
  try:
@@ -193,6 +269,19 @@ def get_debug_output(run_id: str | None):
193
  return {"run_id": _LAST_DEBUG_RUN_ID, "debug": _DEBUG_RUNS[_LAST_DEBUG_RUN_ID]}
194
 
195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  with gr.Blocks(title="Parakeet + Pyannote Pipeline") as demo:
197
  gr.Markdown(
198
  "# End-to-end transcript pipeline\n"
@@ -216,6 +305,23 @@ with gr.Blocks(title="Parakeet + Pyannote Pipeline") as demo:
216
  inputs=[audio_file, huggingface_token],
217
  outputs=output,
218
  api_name="run_complete_pipeline",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  )
220
 
221
  with gr.Row():
@@ -228,6 +334,21 @@ with gr.Blocks(title="Parakeet + Pyannote Pipeline") as demo:
228
  inputs=[debug_run_id],
229
  outputs=debug_output,
230
  api_name="get_debug_output",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  )
232
 
233
 
 
26
  _MAX_DEBUG_RUNS = 10
27
  _LAST_DEBUG_RUN_ID: str | None = None
28
 
29
+ RUN_COMPLETE_PIPELINE_OUTPUT_SCHEMA: dict[str, Any] = {
30
+ "type": "object",
31
+ "description": "Merged transcript output from Parakeet (word timestamps) and Pyannote diarization.",
32
+ "properties": {
33
+ "summary": {
34
+ "type": "object",
35
+ "properties": {
36
+ "diarization_key_used": {"type": "string", "example": "exclusive_speaker_diarization"},
37
+ "parakeet_word_count": {"type": "integer"},
38
+ "pyannote_segment_count": {"type": "integer"},
39
+ "turn_count": {"type": "integer"},
40
+ "assigned_word_count": {"type": "integer"},
41
+ "unassigned_word_count": {"type": "integer"},
42
+ },
43
+ "required": [
44
+ "diarization_key_used",
45
+ "parakeet_word_count",
46
+ "pyannote_segment_count",
47
+ "turn_count",
48
+ "assigned_word_count",
49
+ "unassigned_word_count",
50
+ ],
51
+ },
52
+ "turns": {
53
+ "type": "array",
54
+ "items": {
55
+ "type": "object",
56
+ "properties": {
57
+ "speaker": {"type": "string", "example": "SPEAKER_02"},
58
+ "start": {"type": "number", "example": 40.72},
59
+ "end": {"type": "number", "example": 514.0},
60
+ "text": {"type": "string"},
61
+ },
62
+ "required": ["speaker", "start", "end", "text"],
63
+ },
64
+ },
65
+ "transcript_text": {"type": "string"},
66
+ },
67
+ "required": ["summary", "turns", "transcript_text"],
68
+ }
69
+
70
+ RUN_COMPLETE_PIPELINE_OUTPUT_EXAMPLE: dict[str, Any] = {
71
+ "summary": {
72
+ "diarization_key_used": "exclusive_speaker_diarization",
73
+ "parakeet_word_count": 1234,
74
+ "pyannote_segment_count": 42,
75
+ "turn_count": 39,
76
+ "assigned_word_count": 1219,
77
+ "unassigned_word_count": 15,
78
+ },
79
+ "turns": [
80
+ {
81
+ "speaker": "SPEAKER_00",
82
+ "start": 0.0,
83
+ "end": 12.34,
84
+ "text": "Good morning and welcome to the earnings call.",
85
+ },
86
+ {
87
+ "speaker": "SPEAKER_01",
88
+ "start": 12.34,
89
+ "end": 19.02,
90
+ "text": "Thank you. Let us begin with quarterly highlights.",
91
+ },
92
+ ],
93
+ "transcript_text": "[0.00 - 12.34] SPEAKER_00: Good morning ...",
94
+ }
95
+
96
+ RUN_COMPLETE_PIPELINE_INPUT_SCHEMA: dict[str, Any] = {
97
+ "type": "object",
98
+ "properties": {
99
+ "audio_file": {"type": "file", "description": "Audio file upload"},
100
+ "huggingface_token": {"type": "string", "description": "HF access token for pyannote model"},
101
+ },
102
+ "required": ["audio_file", "huggingface_token"],
103
+ }
104
+
105
 
106
  def _preload_model(model_label: str, preload_fn) -> None:
107
  try:
 
269
  return {"run_id": _LAST_DEBUG_RUN_ID, "debug": _DEBUG_RUNS[_LAST_DEBUG_RUN_ID]}
270
 
271
 
272
+ def get_run_complete_pipeline_schema() -> dict[str, Any]:
273
+ return {
274
+ "api_name": "/run_complete_pipeline",
275
+ "input_schema": RUN_COMPLETE_PIPELINE_INPUT_SCHEMA,
276
+ "output_schema": RUN_COMPLETE_PIPELINE_OUTPUT_SCHEMA,
277
+ "output_example": RUN_COMPLETE_PIPELINE_OUTPUT_EXAMPLE,
278
+ "notes": [
279
+ "Use /get_debug_output to fetch raw model payloads and timing.",
280
+ "The production route returns only merged transcript JSON.",
281
+ ],
282
+ }
283
+
284
+
285
  with gr.Blocks(title="Parakeet + Pyannote Pipeline") as demo:
286
  gr.Markdown(
287
  "# End-to-end transcript pipeline\n"
 
305
  inputs=[audio_file, huggingface_token],
306
  outputs=output,
307
  api_name="run_complete_pipeline",
308
+ api_description=(
309
+ "Run Parakeet + Pyannote and return merged transcript JSON.\n"
310
+ "Response shape:\n"
311
+ "{\n"
312
+ ' "summary": {\n'
313
+ ' "diarization_key_used": str,\n'
314
+ ' "parakeet_word_count": int,\n'
315
+ ' "pyannote_segment_count": int,\n'
316
+ ' "turn_count": int,\n'
317
+ ' "assigned_word_count": int,\n'
318
+ ' "unassigned_word_count": int\n'
319
+ " },\n"
320
+ ' "turns": [{"speaker": str, "start": float, "end": float, "text": str}],\n'
321
+ ' "transcript_text": str\n'
322
+ "}\n"
323
+ "For full machine-readable schema + example, call /get_run_complete_pipeline_schema."
324
+ ),
325
  )
326
 
327
  with gr.Row():
 
334
  inputs=[debug_run_id],
335
  outputs=debug_output,
336
  api_name="get_debug_output",
337
+ api_description=(
338
+ "Return latest (or selected) debug payload including raw Parakeet/Pyannote outputs "
339
+ "and aggregated pipeline timing."
340
+ ),
341
+ )
342
+
343
+ with gr.Row(visible=False):
344
+ schema_btn = gr.Button("get_run_complete_pipeline_schema")
345
+ schema_output = gr.JSON(label="run_complete_pipeline schema", visible=False)
346
+ schema_btn.click(
347
+ fn=get_run_complete_pipeline_schema,
348
+ inputs=None,
349
+ outputs=schema_output,
350
+ api_name="get_run_complete_pipeline_schema",
351
+ api_description="Return input/output schema contract for /run_complete_pipeline.",
352
  )
353
 
354