Spaces:

Ratnesh-dev
/

transcribe-diarize

Running

App Files Files Community

Ratnesh-dev commited on Feb 25

Commit

c7d2aa0

1 Parent(s): 035bf47

Add Schema Documentation To API

Browse files

Files changed (2) hide show

README.md +32 -1
app.py +121 -0

README.md CHANGED Viewed

@@ -25,12 +25,35 @@ Model setup is global/outside `@spaces.GPU` so setup time is not billed to ZeroG
 ## API routes
 - `/run_complete_pipeline`
 - `/get_debug_output`
 ## `/run_complete_pipeline` inputs
 - `audio_file` (file path from Gradio client upload)
 - `huggingface_token`
-Returns: merged transcript JSON only.
 ## `/get_debug_output` inputs
 - `run_id` (optional)
@@ -41,6 +64,14 @@ Returns: raw/debug payload for the latest run (or specific run if provided), inc
 - merged transcript payload
 - aggregated timing
 ## IPython example
 ```python
 from gradio_client import Client, handle_file

 ## API routes
 - `/run_complete_pipeline`
 - `/get_debug_output`
+- `/get_run_complete_pipeline_schema`
 ## `/run_complete_pipeline` inputs
 - `audio_file` (file path from Gradio client upload)
 - `huggingface_token`
+Returns: merged transcript JSON only, with this structure:
+```json
+{
+  "summary": {
+    "diarization_key_used": "exclusive_speaker_diarization",
+    "parakeet_word_count": 1234,
+    "pyannote_segment_count": 42,
+    "turn_count": 39,
+    "assigned_word_count": 1219,
+    "unassigned_word_count": 15
+  },
+  "turns": [
+    {
+      "speaker": "SPEAKER_00",
+      "start": 0.0,
+      "end": 12.34,
+      "text": "Good morning and welcome to the earnings call."
+    }
+  ],
+  "transcript_text": "[0.00 - 12.34] SPEAKER_00: Good morning and welcome to the earnings call."
+}
+```
 ## `/get_debug_output` inputs
 - `run_id` (optional)
 - merged transcript payload
 - aggregated timing
+## `/get_run_complete_pipeline_schema` inputs
+- none
+Returns a machine-readable contract for `/run_complete_pipeline`:
+- `input_schema`
+- `output_schema`
+- `output_example`
 ## IPython example
 ```python
 from gradio_client import Client, handle_file

app.py CHANGED Viewed

@@ -26,6 +26,82 @@ _DEBUG_RUNS: "OrderedDict[str, dict[str, Any]]" = OrderedDict()
 _MAX_DEBUG_RUNS = 10
 _LAST_DEBUG_RUN_ID: str | None = None
 def _preload_model(model_label: str, preload_fn) -> None:
     try:
@@ -193,6 +269,19 @@ def get_debug_output(run_id: str | None):
     return {"run_id": _LAST_DEBUG_RUN_ID, "debug": _DEBUG_RUNS[_LAST_DEBUG_RUN_ID]}
 with gr.Blocks(title="Parakeet + Pyannote Pipeline") as demo:
     gr.Markdown(
         "# End-to-end transcript pipeline\n"
@@ -216,6 +305,23 @@ with gr.Blocks(title="Parakeet + Pyannote Pipeline") as demo:
         inputs=[audio_file, huggingface_token],
         outputs=output,
         api_name="run_complete_pipeline",
     )
     with gr.Row():
@@ -228,6 +334,21 @@ with gr.Blocks(title="Parakeet + Pyannote Pipeline") as demo:
         inputs=[debug_run_id],
         outputs=debug_output,
         api_name="get_debug_output",
     )

 _MAX_DEBUG_RUNS = 10
 _LAST_DEBUG_RUN_ID: str | None = None
+RUN_COMPLETE_PIPELINE_OUTPUT_SCHEMA: dict[str, Any] = {
+    "type": "object",
+    "description": "Merged transcript output from Parakeet (word timestamps) and Pyannote diarization.",
+    "properties": {
+        "summary": {
+            "type": "object",
+            "properties": {
+                "diarization_key_used": {"type": "string", "example": "exclusive_speaker_diarization"},
+                "parakeet_word_count": {"type": "integer"},
+                "pyannote_segment_count": {"type": "integer"},
+                "turn_count": {"type": "integer"},
+                "assigned_word_count": {"type": "integer"},
+                "unassigned_word_count": {"type": "integer"},
+            },
+            "required": [
+                "diarization_key_used",
+                "parakeet_word_count",
+                "pyannote_segment_count",
+                "turn_count",
+                "assigned_word_count",
+                "unassigned_word_count",
+            ],
+        },
+        "turns": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "speaker": {"type": "string", "example": "SPEAKER_02"},
+                    "start": {"type": "number", "example": 40.72},
+                    "end": {"type": "number", "example": 514.0},
+                    "text": {"type": "string"},
+                },
+                "required": ["speaker", "start", "end", "text"],
+            },
+        },
+        "transcript_text": {"type": "string"},
+    },
+    "required": ["summary", "turns", "transcript_text"],
+}
+RUN_COMPLETE_PIPELINE_OUTPUT_EXAMPLE: dict[str, Any] = {
+    "summary": {
+        "diarization_key_used": "exclusive_speaker_diarization",
+        "parakeet_word_count": 1234,
+        "pyannote_segment_count": 42,
+        "turn_count": 39,
+        "assigned_word_count": 1219,
+        "unassigned_word_count": 15,
+    },
+    "turns": [
+        {
+            "speaker": "SPEAKER_00",
+            "start": 0.0,
+            "end": 12.34,
+            "text": "Good morning and welcome to the earnings call.",
+        },
+        {
+            "speaker": "SPEAKER_01",
+            "start": 12.34,
+            "end": 19.02,
+            "text": "Thank you. Let us begin with quarterly highlights.",
+        },
+    ],
+    "transcript_text": "[0.00 - 12.34] SPEAKER_00: Good morning ...",
+}
+RUN_COMPLETE_PIPELINE_INPUT_SCHEMA: dict[str, Any] = {
+    "type": "object",
+    "properties": {
+        "audio_file": {"type": "file", "description": "Audio file upload"},
+        "huggingface_token": {"type": "string", "description": "HF access token for pyannote model"},
+    },
+    "required": ["audio_file", "huggingface_token"],
+}
 def _preload_model(model_label: str, preload_fn) -> None:
     try:
     return {"run_id": _LAST_DEBUG_RUN_ID, "debug": _DEBUG_RUNS[_LAST_DEBUG_RUN_ID]}
+def get_run_complete_pipeline_schema() -> dict[str, Any]:
+    return {
+        "api_name": "/run_complete_pipeline",
+        "input_schema": RUN_COMPLETE_PIPELINE_INPUT_SCHEMA,
+        "output_schema": RUN_COMPLETE_PIPELINE_OUTPUT_SCHEMA,
+        "output_example": RUN_COMPLETE_PIPELINE_OUTPUT_EXAMPLE,
+        "notes": [
+            "Use /get_debug_output to fetch raw model payloads and timing.",
+            "The production route returns only merged transcript JSON.",
+        ],
+    }
 with gr.Blocks(title="Parakeet + Pyannote Pipeline") as demo:
     gr.Markdown(
         "# End-to-end transcript pipeline\n"
         inputs=[audio_file, huggingface_token],
         outputs=output,
         api_name="run_complete_pipeline",
+        api_description=(
+            "Run Parakeet + Pyannote and return merged transcript JSON.\n"
+            "Response shape:\n"
+            "{\n"
+            '  "summary": {\n'
+            '    "diarization_key_used": str,\n'
+            '    "parakeet_word_count": int,\n'
+            '    "pyannote_segment_count": int,\n'
+            '    "turn_count": int,\n'
+            '    "assigned_word_count": int,\n'
+            '    "unassigned_word_count": int\n'
+            "  },\n"
+            '  "turns": [{"speaker": str, "start": float, "end": float, "text": str}],\n'
+            '  "transcript_text": str\n'
+            "}\n"
+            "For full machine-readable schema + example, call /get_run_complete_pipeline_schema."
+        ),
     )
     with gr.Row():
         inputs=[debug_run_id],
         outputs=debug_output,
         api_name="get_debug_output",
+        api_description=(
+            "Return latest (or selected) debug payload including raw Parakeet/Pyannote outputs "
+            "and aggregated pipeline timing."
+        ),
+    )
+    with gr.Row(visible=False):
+        schema_btn = gr.Button("get_run_complete_pipeline_schema")
+    schema_output = gr.JSON(label="run_complete_pipeline schema", visible=False)
+    schema_btn.click(
+        fn=get_run_complete_pipeline_schema,
+        inputs=None,
+        outputs=schema_output,
+        api_name="get_run_complete_pipeline_schema",
+        api_description="Return input/output schema contract for /run_complete_pipeline.",
     )