Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import os | |
| import tempfile | |
| from pathlib import Path | |
| import sys | |
| # Importer le module de traitement | |
| from processing import ( | |
| run_diarization, | |
| run_transcription, | |
| combine_diarization_transcription, | |
| format_output | |
| ) | |
| def process_audio_stt_diarization( | |
| audio_file, | |
| diarization_model | |
| ): | |
| """Interface Gradio pour STT + Diarization combinés.""" | |
| if audio_file is None: | |
| return None, "❌ Veuillez uploader un fichier audio" | |
| try: | |
| # Gérer le chemin du fichier audio | |
| if isinstance(audio_file, tuple): | |
| audio_path = audio_file[1] if len(audio_file) > 1 else audio_file[0] | |
| elif isinstance(audio_file, str): | |
| audio_path = audio_file | |
| elif hasattr(audio_file, 'name'): | |
| audio_path = audio_file.name | |
| else: | |
| audio_path = str(audio_file) | |
| if not os.path.exists(audio_path): | |
| return None, f"❌ Fichier audio introuvable: {audio_path}" | |
| # Récupérer le token HF | |
| hf_token = os.environ.get("HF_TOKEN") | |
| if not hf_token: | |
| return None, "❌ Token Hugging Face non configuré (HF_TOKEN)" | |
| # Créer un répertoire temporaire pour les résultats | |
| with tempfile.TemporaryDirectory() as tmpdir: | |
| # Étape 1: Diarisation | |
| try: | |
| diarization_segments = run_diarization( | |
| audio_path, | |
| hf_token, | |
| model_name=diarization_model | |
| ) | |
| except Exception as e: | |
| return None, f"❌ Erreur lors de la diarisation: {str(e)}" | |
| # Étape 2: Transcription | |
| try: | |
| transcription_segments = run_transcription( | |
| audio_path, | |
| hf_token=hf_token | |
| ) | |
| except Exception as e: | |
| return None, f"❌ Erreur lors de la transcription: {str(e)}" | |
| # Étape 3: Combinaison | |
| try: | |
| combined = combine_diarization_transcription( | |
| diarization_segments, | |
| transcription_segments | |
| ) | |
| except Exception as e: | |
| return None, f"❌ Erreur lors de la combinaison: {str(e)}" | |
| # Étape 4: Formatage | |
| formatted_text = format_output(combined) | |
| # Sauvegarder dans un fichier temporaire | |
| output_file = os.path.join(tmpdir, "transcription.txt") | |
| with open(output_file, 'w', encoding='utf-8') as f: | |
| f.write(formatted_text) | |
| # Créer un résumé | |
| from collections import defaultdict | |
| speaker_stats = defaultdict(lambda: {"total_duration": 0.0, "num_segments": 0, "text_length": 0}) | |
| for seg in combined: | |
| speaker = seg["speaker"] | |
| duration = seg["end"] - seg["start"] | |
| speaker_stats[speaker]["total_duration"] += duration | |
| speaker_stats[speaker]["num_segments"] += 1 | |
| speaker_stats[speaker]["text_length"] += len(seg["text"]) | |
| summary = f""" | |
| # Résultats STT + Diarization | |
| **Fichier:** {Path(audio_path).name} | |
| **Modèle diarization:** {diarization_model} | |
| **Modèle STT:** bofenghuang/whisper-large-v3-french | |
| **Locuteurs détectés:** {len(speaker_stats)} | |
| **Segments combinés:** {len(combined)} | |
| ## Statistiques par locuteur | |
| """ | |
| for speaker, stats in sorted(speaker_stats.items()): | |
| speaker_num = int(speaker.replace("SPEAKER_", "")) | |
| speaker_name = f"Speaker {chr(65 + speaker_num)}" | |
| avg_duration = stats["total_duration"] / stats["num_segments"] if stats["num_segments"] > 0 else 0 | |
| summary += f"\n- **{speaker_name}**: {stats['num_segments']} segments, {stats['total_duration']:.2f}s total, {avg_duration:.2f}s moyenne/segment, {stats['text_length']} caractères" | |
| return output_file, summary | |
| except Exception as e: | |
| import traceback | |
| error_details = traceback.format_exc() | |
| error_msg = f"""❌ **Erreur lors du traitement** | |
| **Message:** {str(e)} | |
| **Détails techniques:** | |
| ``` | |
| {error_details} | |
| ``` | |
| **Solutions possibles:** | |
| - Vérifiez que le fichier audio est valide | |
| - Assurez-vous que le token HF_TOKEN est configuré dans les secrets de la Space | |
| - Réessayez avec un fichier audio plus court | |
| """ | |
| return None, error_msg | |
| # Interface Gradio | |
| with gr.Blocks(title="Gilbert - STT + Diarization") as demo: | |
| gr.Markdown(""" | |
| # 🎤 Gilbert - STT + Diarization | |
| Pipeline complet de transcription (STT) et diarisation de locuteurs. | |
| **Fonctionnalités:** | |
| - 🎤 Diarisation de locuteurs avec pyannote.audio | |
| - 📝 Transcription avec Whisper Large V3 French (fine-tuné pour le français) | |
| - 🔗 Combinaison automatique pour une sortie formatée: "Speaker A : texte" | |
| **Instructions:** | |
| 1. Uploadez un fichier audio (WAV, MP3, M4A) | |
| 2. Configurez les paramètres de diarisation (optionnel) | |
| 3. Cliquez sur "Traiter" | |
| 4. Téléchargez la transcription avec identification des locuteurs | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| audio_input = gr.Audio( | |
| label="Fichier audio", | |
| type="filepath" | |
| ) | |
| diarization_model = gr.Dropdown( | |
| choices=[ | |
| "pyannote/speaker-diarization-community-1", | |
| "pyannote/speaker-diarization-3.1", | |
| ], | |
| value="pyannote/speaker-diarization-community-1", | |
| label="Modèle de diarisation" | |
| ) | |
| process_btn = gr.Button("🚀 Traiter", variant="primary") | |
| with gr.Column(): | |
| summary_output = gr.Markdown(label="Résumé") | |
| transcription_output = gr.File( | |
| label="Transcription (format: Speaker A : texte)", | |
| type="filepath" | |
| ) | |
| process_btn.click( | |
| fn=process_audio_stt_diarization, | |
| inputs=[audio_input, diarization_model], | |
| outputs=[transcription_output, summary_output] | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| **Note:** Vous devez avoir un token Hugging Face configuré avec accès aux modèles pyannote et Whisper. | |
| Configurez-le dans les secrets de la Space avec: `HF_TOKEN="votre_token"` | |
| **Modèles utilisés:** | |
| - **Diarization**: pyannote/speaker-diarization-community-1 (ou 3.1) | |
| - **STT**: bofenghuang/whisper-large-v3-french (Whisper Large V3 fine-tuné pour le français) | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |