gryannote

Runtime error

ahmad walidurosyad Claude commited on Nov 23, 2025

Commit

b02904a

1 Parent(s): 5ea2528

Add user-selectable model UI with DiariZen support

- Add dropdown UI for model selection (4 models available)
- Support DiariZen WavLM Large/Base/MLC models (no token required)
- Support Pyannote 3.1 model (requires HF_TOKEN)
- Implement model caching for performance
- Add status messages and model info display
- Improve error handling and user feedback
- Fix: DiariZen models now use correct API (DiariZenPipeline)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (2) hide show

app.py +203 -66
requirements.txt +3 -0

app.py CHANGED Viewed

@@ -1,86 +1,223 @@
-import spaces
 import gradio as gr
 from gryannote_audio import AudioLabeling
 from gryannote_rttm import RTTM
-from pyannote.audio import Pipeline
-import os
-import torch
-@spaces.GPU(duration=120)
-def apply_pipeline(audio):
-    """Apply specified pipeline on the indicated audio file"""
-    pipeline = Pipeline.from_pretrained("BUT-FIT/diarizen-wavlm-large-s80-md", use_auth_token=os.environ["HF_TOKEN"])
-    pipeline.to(torch.device("cuda"))
-    annotations = pipeline(audio)
-    return ((audio, annotations), annotations)
-def update_annotations(data):
-    return rttm.on_edit(data)
-with gr.Blocks() as demo:
     with gr.Row():
-        with gr.Column():
             with gr.Row():
-                with gr.Column(scale=1):
-                    gr.Markdown(
-                        '<a href="https://github.com/clement-pages/gryannote"><img src="https://github.com/clement-pages/gryannote/blob/main/docs/assets/logo-gryannote.png?raw=true" alt="gryannote logo" width="140"/></a>',
-                        )
-                with gr.Column(scale=10):
-                        gr.Markdown('<h1 style="font-size: 4em;">gryannote</h1>')
-                        gr.Markdown()
-                        gr.Markdown('<h2 style="font-size: 2em;">Make the audio labeling process easier and faster! </h2>')
-            with gr.Tab("application"):
-                gr.Markdown(
-                    "To use the component, start by loading or recording audio."
-                    "Then apply the diarization pipeline (here [pyannote/speaker-diarization-3.1](https://huggingface.co/pyannote/speaker-diarization-3.1))"
-                    "or double-click directly on the waveform to add an annotations. The annotations produced can be edited."
-                    " You can also use keyboard shortcuts to speed things up! Click on the help button to see all the available shortcuts."
-                    " Finally, annotations can be saved by cliking on the downloading button in the RTTM component."
-                )
-                gr.Markdown()
-                gr.Markdown()
-                audio_labeling = AudioLabeling(
-                    type="filepath",
-                    interactive=True,
-                )
-                gr.Markdown()
-                gr.Markdown()
-                run_btn = gr.Button("Run pipeline")
-                rttm = RTTM()
-            with gr.Tab("poster"):
-                gr.Markdown(
-                    '<p align="center"><img src="https://github.com/clement-pages/gryannote/blob/main/docs/assets/poster-interspeech.jpg?raw=true" alt="gryannote poster" width=700em/></p>'
-                )
     run_btn.click(
         fn=apply_pipeline,
-        inputs=audio_labeling,
-        outputs=[audio_labeling, rttm],
     )
-    audio_labeling.edit(
-        fn=update_annotations,
-        inputs=audio_labeling,
-        outputs=rttm,
-        preprocess=False,
-        postprocess=False,
     )
-    rttm.upload(
-        fn=audio_labeling.load_annotations,
-        inputs=[audio_labeling, rttm],
-        outputs=audio_labeling,
     )
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
+import torch
+import os
+import spaces
 from gryannote_audio import AudioLabeling
 from gryannote_rttm import RTTM
+# Model cache to avoid reloading
+model_cache = {}
+AVAILABLE_MODELS = {
+    "DiariZen WavLM Large (Recommended)": {
+        "id": "BUT-FIT/diarizen-wavlm-large-s80-md",
+        "type": "diarizen",
+        "requires_token": False,
+        "speed": "Fast",
+        "quality": "High",
+        "description": "Optimized 63M parameter model with excellent performance"
+    },
+    "DiariZen WavLM Base": {
+        "id": "BUT-FIT/diarizen-wavlm-base-s80-md",
+        "type": "diarizen",
+        "requires_token": False,
+        "speed": "Very Fast",
+        "quality": "Good",
+        "description": "Lighter model for faster inference"
+    },
+    "DiariZen WavLM Large MLC": {
+        "id": "BUT-FIT/diarizen-wavlm-large-s80-mlc",
+        "type": "diarizen",
+        "requires_token": False,
+        "speed": "Fast",
+        "quality": "High",
+        "description": "Multi-language optimized variant"
+    },
+    "Pyannote 3.1": {
+        "id": "pyannote/speaker-diarization-3.1",
+        "type": "pyannote",
+        "requires_token": True,
+        "speed": "Medium",
+        "quality": "High",
+        "description": "Original pyannote model (requires HF token)"
+    }
+}
+def load_pipeline(model_name):
+    """Load diarization pipeline based on model selection"""
+    model_config = AVAILABLE_MODELS[model_name]
+    model_id = model_config["id"]
+    # Check cache first
+    if model_id in model_cache:
+        return model_cache[model_id], None
+    try:
+        if model_config["type"] == "diarizen":
+            from diarizen.pipelines.inference import DiariZenPipeline
+            pipeline = DiariZenPipeline.from_pretrained(model_id)
+        elif model_config["type"] == "pyannote":
+            from pyannote.audio import Pipeline
+            # Check for HF token
+            if "HF_TOKEN" not in os.environ:
+                return None, "⚠️ Pyannote requires HF_TOKEN in Space secrets"
+            pipeline = Pipeline.from_pretrained(
+                model_id,
+                use_auth_token=os.environ["HF_TOKEN"]
+            )
+        # Move to GPU if available
+        if torch.cuda.is_available():
+            pipeline.to(torch.device("cuda"))
+        # Cache the model
+        model_cache[model_id] = pipeline
+        return pipeline, f"✅ {model_name} loaded successfully"
+    except Exception as e:
+        return None, f"❌ Error loading {model_name}: {str(e)}"
+@spaces.GPU(duration=120)
+def apply_pipeline(audio, model_name):
+    """Apply selected diarization model to audio"""
+    if audio is None:
+        return None, None, "⚠️ Please upload or record audio first"
+    # Load pipeline
+    pipeline, message = load_pipeline(model_name)
+    if pipeline is None:
+        return None, None, message
+    # Run diarization
+    try:
+        annotations = pipeline(audio)
+        return (audio, annotations), annotations, f"✅ Diarization complete with {model_name}"
+    except Exception as e:
+        return None, None, f"❌ Error during diarization: {str(e)}"
+def update_annotations(new_annotations):
+    """Update RTTM annotations from audio labeling"""
+    rttm_obj.annotations = new_annotations
+    return new_annotations
+def load_rttm_to_audio(rttm_annotations):
+    """Load RTTM annotations to audio labeling"""
+    audio_labeling.load_annotations(rttm_annotations)
+    return audio_labeling.value
+# Initialize components
+audio_labeling = AudioLabeling(type="filepath")
+rttm_obj = RTTM()
+# Build Gradio Interface
+with gr.Blocks(title="GryanNote - Speaker Diarization") as demo:
+    gr.Markdown("""
+    # 🎙️ GryanNote - Speaker Diarization
+    Label speakers in audio recordings using state-of-the-art diarization models
+    """)
     with gr.Row():
+        with gr.Column(scale=1):
+            # Model selection dropdown
+            model_selector = gr.Dropdown(
+                choices=list(AVAILABLE_MODELS.keys()),
+                value="DiariZen WavLM Large (Recommended)",
+                label="🤖 Select Diarization Model",
+                info="Choose the model for speaker diarization"
+            )
+            # Model info display
+            with gr.Accordion("ℹ️ Model Information", open=False):
+                model_info = gr.Markdown()
+            # Audio input
+            audio_labeling.render()
+            # Action buttons
             with gr.Row():
+                run_btn = gr.Button("▶️ Run Diarization", variant="primary", size="lg")
+                clear_btn = gr.Button("🗑️ Clear", size="lg")
+        with gr.Column(scale=1):
+            # Status message
+            status_msg = gr.Textbox(
+                label="📊 Status",
+                interactive=False,
+                lines=3
+            )
+            # RTTM output
+            gr.Markdown("### 📝 RTTM Output")
+            rttm_obj.render()
+    # Footer
+    gr.Markdown("""
+    ---
+    **Models:**
+    - **DiariZen**: Optimized models by BUT-FIT, no token required
+    - **Pyannote**: Original model, requires HF token in Space secrets
+    **Usage:** Upload audio → Select model → Run diarization → Download/Edit annotations
+    """)
+    # Update model info when selection changes
+    def update_model_info(model_name):
+        config = AVAILABLE_MODELS[model_name]
+        info = f"""
+        **Model ID:** `{config['id']}`
+        **Type:** {config['type'].upper()}
+        **Speed:** {config['speed']} | **Quality:** {config['quality']}
+        **Token Required:** {'Yes ⚠️ (Add HF_TOKEN to Space secrets)' if config['requires_token'] else 'No ✅'}
+        {config['description']}
+        """
+        return info
+    # Initialize model info
+    demo.load(
+        fn=update_model_info,
+        inputs=[model_selector],
+        outputs=[model_info]
+    )
+    model_selector.change(
+        fn=update_model_info,
+        inputs=[model_selector],
+        outputs=[model_info]
+    )
+    # Run pipeline button
     run_btn.click(
         fn=apply_pipeline,
+        inputs=[audio_labeling.value, model_selector],
+        outputs=[audio_labeling.value, rttm_obj.value, status_msg]
     )
+    # Clear button
+    clear_btn.click(
+        fn=lambda: (None, None, "Cleared"),
+        inputs=[],
+        outputs=[audio_labeling.value, rttm_obj.value, status_msg]
     )
+    # Sync annotations between components
+    audio_labeling.change(
+        fn=update_annotations,
+        inputs=[audio_labeling.value],
+        outputs=[rttm_obj.value]
     )
+    rttm_obj.upload(
+        fn=load_rttm_to_audio,
+        inputs=[rttm_obj.value],
+        outputs=[audio_labeling.value]
+    )
 if __name__ == "__main__":
     demo.launch()

requirements.txt CHANGED Viewed

@@ -1,3 +1,6 @@
 gryannote==0.3.3
 pyannote-audio==3.3.2
 spaces==0.30.2

 gryannote==0.3.3
 pyannote-audio==3.3.2
+diarizen
 spaces==0.30.2
+torch
+gradio