| | import gradio as gr |
| | import os |
| | import tempfile |
| | from pathlib import Path |
| | import sys |
| |
|
| | |
| | from diarization_pyannote_demo import run_pyannote_diarization, write_rttm, write_json |
| |
|
| | def diarize_audio(audio_file, model_name, num_speakers, min_speakers, max_speakers, use_exclusive): |
| | """Interface Gradio pour la diarisation pyannote.""" |
| | |
| | if audio_file is None: |
| | return None, None, "❌ Veuillez uploader un fichier audio" |
| | |
| | try: |
| | |
| | if isinstance(audio_file, tuple): |
| | |
| | audio_path = audio_file[1] if len(audio_file) > 1 else audio_file[0] |
| | elif isinstance(audio_file, str): |
| | |
| | audio_path = audio_file |
| | elif hasattr(audio_file, 'name'): |
| | |
| | audio_path = audio_file.name |
| | else: |
| | |
| | audio_path = str(audio_file) |
| | |
| | if not os.path.exists(audio_path): |
| | return None, None, f"❌ Fichier audio introuvable: {audio_path}" |
| | |
| | |
| | with tempfile.TemporaryDirectory() as tmpdir: |
| | |
| | result = run_pyannote_diarization( |
| | audio_path, |
| | output_dir=tmpdir, |
| | model_name=model_name, |
| | num_speakers=num_speakers if num_speakers > 0 else None, |
| | min_speakers=min_speakers if min_speakers > 0 else None, |
| | max_speakers=max_speakers if max_speakers > 0 else None, |
| | use_exclusive=use_exclusive, |
| | show_progress=False |
| | ) |
| | |
| | |
| | audio_name = Path(audio_path).stem |
| | rttm_path = os.path.join(tmpdir, f"{audio_name}.rttm") |
| | json_path = os.path.join(tmpdir, f"{audio_name}.json") |
| | |
| | write_rttm(result["segments"], rttm_path, audio_name) |
| | write_json(result["segments"], json_path) |
| | |
| | |
| | summary = f""" |
| | # Résultats de diarisation |
| | |
| | **Fichier:** {Path(audio_path).name} |
| | **Modèle:** {model_name} |
| | **Locuteurs détectés:** {result['num_speakers']} |
| | **Segments:** {len(result['segments'])} |
| | **Durée totale:** {result.get('duration', 0):.2f} secondes |
| | |
| | ## Statistiques par locuteur |
| | """ |
| | from collections import defaultdict |
| | speaker_stats = defaultdict(lambda: {"total_duration": 0.0, "num_segments": 0}) |
| | for seg in result["segments"]: |
| | speaker = seg["speaker"] |
| | duration = seg["end"] - seg["start"] |
| | speaker_stats[speaker]["total_duration"] += duration |
| | speaker_stats[speaker]["num_segments"] += 1 |
| | |
| | for speaker, stats in sorted(speaker_stats.items()): |
| | avg_duration = stats["total_duration"] / stats["num_segments"] if stats["num_segments"] > 0 else 0 |
| | summary += f"\n- **{speaker}**: {stats['num_segments']} segments, {stats['total_duration']:.2f}s total, {avg_duration:.2f}s moyenne/segment" |
| | |
| | |
| | return rttm_path, json_path, summary |
| | |
| | except Exception as e: |
| | import traceback |
| | error_details = traceback.format_exc() |
| | error_msg = f"""❌ **Erreur lors de la diarisation** |
| | |
| | **Message:** {str(e)} |
| | |
| | **Détails techniques:** |
| | ``` |
| | {error_details} |
| | ``` |
| | |
| | **Solutions possibles:** |
| | - Vérifiez que le fichier audio est valide |
| | - Assurez-vous que le token HF_TOKEN est configuré dans les secrets de la Space |
| | - Réessayez avec un fichier audio plus court |
| | """ |
| | return None, None, error_msg |
| |
|
| | |
| | with gr.Blocks(title="Gilbert - Diarisation pyannote") as demo: |
| | gr.Markdown(""" |
| | # 🎤 Gilbert - Diarisation pyannote |
| | |
| | Interface pour la diarisation de locuteurs avec pyannote.audio |
| | |
| | **Instructions:** |
| | 1. Uploadez un fichier audio (WAV, MP3, M4A) |
| | 2. Configurez les paramètres (optionnel) |
| | 3. Cliquez sur "Diariser" |
| | 4. Téléchargez les résultats (RTTM et JSON) |
| | """) |
| | |
| | with gr.Row(): |
| | with gr.Column(): |
| | audio_input = gr.Audio( |
| | label="Fichier audio", |
| | type="filepath" |
| | ) |
| | |
| | model_name = gr.Dropdown( |
| | choices=[ |
| | "pyannote/speaker-diarization-3.1", |
| | "pyannote/speaker-diarization-community-1", |
| | ], |
| | value="pyannote/speaker-diarization-3.1", |
| | label="Modèle pyannote" |
| | ) |
| | |
| | with gr.Row(): |
| | num_speakers = gr.Number( |
| | label="Nombre exact de locuteurs", |
| | value=0, |
| | minimum=0, |
| | info="0 = auto-détection" |
| | ) |
| | min_speakers = gr.Number( |
| | label="Min locuteurs", |
| | value=0, |
| | minimum=0, |
| | info="0 = pas de limite" |
| | ) |
| | max_speakers = gr.Number( |
| | label="Max locuteurs", |
| | value=0, |
| | minimum=0, |
| | info="0 = pas de limite" |
| | ) |
| | |
| | use_exclusive = gr.Checkbox( |
| | label="Exclusive speaker diarization", |
| | value=False, |
| | info="Simplifie la réconciliation avec transcription" |
| | ) |
| | |
| | diarize_btn = gr.Button("🎯 Diariser", variant="primary") |
| | |
| | with gr.Column(): |
| | summary_output = gr.Markdown(label="Résumé") |
| | rttm_output = gr.File(label="Fichier RTTM", type="filepath") |
| | json_output = gr.File(label="Fichier JSON", type="filepath") |
| | |
| | diarize_btn.click( |
| | fn=diarize_audio, |
| | inputs=[audio_input, model_name, num_speakers, min_speakers, max_speakers, use_exclusive], |
| | outputs=[rttm_output, json_output, summary_output] |
| | ) |
| | |
| | gr.Markdown(""" |
| | --- |
| | **Note:** Vous devez avoir un token Hugging Face configuré avec accès aux modèles pyannote. |
| | Configurez-le avec: `export HF_TOKEN="votre_token"` |
| | """) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |
| |
|
| |
|