Spaces:

evannh
/

test_diarization

Sleeping

App Files Files Community

evannh commited on Jun 2, 2025

Commit

7a0150b

verified ·

1 Parent(s): c16f151

Create app.py

Browse files

Files changed (1) hide show

app.py +54 -0

app.py ADDED Viewed

	@@ -0,0 +1,54 @@

+# app.py
+import gradio as gr
+import whisper
+import spacy
+import torch
+import os
+from pyannote.audio import Pipeline
+# Chargement des modèles
+whisper_model = whisper.load_model("base")  # medium ou large possible
+nlp = spacy.load("fr_core_news_md")
+# Diarisation avec PyAnnote (nécessite un token HF dans les Secrets du Space)
+hf_token = os.getenv("HF_TOKEN")
+if hf_token:
+    diar_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=hf_token)
+else:
+    diar_pipeline = None
+def process_audio(file):
+    result = whisper_model.transcribe(file, language="fr", verbose=False)
+    transcription = result["text"]
+    # Diarisation
+    if diar_pipeline:
+        diar_result = diar_pipeline(file)
+        diar_str = "\n".join([
+            f"{turn.start:.1f}s - {turn.end:.1f}s : {speaker}"
+            for turn, _, speaker in diar_result.itertracks(yield_label=True)
+        ])
+    else:
+        diar_str = "Diarisation non disponible (ajoutez votre HF_TOKEN dans les secrets)"
+    # NER
+    doc = nlp(transcription)
+    entities = [(ent.text, ent.label_) for ent in doc.ents]
+    ent_str = "\n".join([f"{text} ({label})" for text, label in entities]) if entities else "Aucune entité détectée"
+    return transcription, diar_str, ent_str
+demo = gr.Interface(
+    fn=process_audio,
+    inputs=gr.Audio(type="filepath", label="Audio (.mp3/.wav)"),
+    outputs=[
+        gr.Textbox(label="📝 Transcription Whisper"),
+        gr.Textbox(label="🗣️ Diarisation (PyAnnote)"),
+        gr.Textbox(label="🧠 Entités Nommées (spaCy)")
+    ],
+    title="🔎 Pipeline Audio Intelligent",
+    description="Transcription, Diarisation, et Extraction d'Entités Nommées sur un fichier audio français."
+)
+if __name__ == "__main__":
+    demo.launch()