Update app.py
Browse files
app.py
CHANGED
|
@@ -36,20 +36,19 @@ MODEL_NAME = "openai/whisper-medium"
|
|
| 36 |
FILE_LIMIT_MB = 1000
|
| 37 |
YT_LENGTH_LIMIT_S = 3600
|
| 38 |
|
| 39 |
-
device =
|
| 40 |
|
| 41 |
pipe = pipeline(
|
| 42 |
task="automatic-speech-recognition",
|
| 43 |
model=MODEL_NAME,
|
|
|
|
| 44 |
device=device,
|
| 45 |
model_kwargs={"low_cpu_mem_usage": True},
|
| 46 |
-
return_timestamps="word"
|
| 47 |
)
|
| 48 |
|
| 49 |
|
| 50 |
|
| 51 |
|
| 52 |
-
|
| 53 |
def associate_speakers_with_timestamps(transcription_result, diarization, tolerance=0.1, min_segment_duration=0.5):
|
| 54 |
word_segments = transcription_result['chunks']
|
| 55 |
diarization_segments = list(diarization.itertracks(yield_label=True))
|
|
@@ -124,11 +123,9 @@ def parse_simplified_diarization(simplified_text):
|
|
| 124 |
def process_transcription(*args):
|
| 125 |
generator = transcribe_and_diarize(*args)
|
| 126 |
for progress_message, raw_text, speaker_transcription in generator:
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
# Une fois la transcription terminée, effectuez la diarisation
|
| 130 |
simplified_diarization = simplify_diarization_output(speaker_transcription)
|
| 131 |
-
|
| 132 |
|
| 133 |
def process_yt_transcription(*args):
|
| 134 |
html_embed, raw_text, speaker_transcription = yt_transcribe(*args)
|
|
@@ -176,10 +173,6 @@ def display_progress(progress_state):
|
|
| 176 |
""")
|
| 177 |
|
| 178 |
@spaces.GPU(duration=120)
|
| 179 |
-
def stream_transcription(audio):
|
| 180 |
-
for result in pipe(audio, chunk_length_s=10, stride_length_s=(4, 2)):
|
| 181 |
-
yield result["text"]
|
| 182 |
-
|
| 183 |
def transcribe_and_diarize(file_path, task, progress=gr.Progress()):
|
| 184 |
progress(0, desc="Initialisation...")
|
| 185 |
yield "Chargement du fichier...", None, None
|
|
@@ -187,12 +180,10 @@ def transcribe_and_diarize(file_path, task, progress=gr.Progress()):
|
|
| 187 |
progress(0.2, desc="Préparation de l'audio...")
|
| 188 |
yield "Préparation de l'audio...", None, None
|
| 189 |
|
| 190 |
-
progress(0.
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
yield "Transcription en cours...", transcription, []
|
| 195 |
-
|
| 196 |
progress(0.6, desc=" C'est fait 😮💨 ! Je m'active à fusionner tout ça, un instant, J'y suis presque...")
|
| 197 |
if diarization_pipeline:
|
| 198 |
diarization = diarization_pipeline(file_path)
|
|
@@ -323,8 +314,7 @@ demo = gr.Blocks(
|
|
| 323 |
|
| 324 |
|
| 325 |
with demo:
|
| 326 |
-
gr.Markdown("
|
| 327 |
-
### ⚠️ Cette version est une maquette publique. Ne pas mettre de données sensibles, privées ou confidentielles.⚠️""")
|
| 328 |
gr.HTML(
|
| 329 |
"""
|
| 330 |
<div class="logo">
|
|
@@ -400,7 +390,7 @@ with demo:
|
|
| 400 |
progress_display = gr.Markdown(label="État de la progression")
|
| 401 |
|
| 402 |
with gr.Accordion("Résultats 📊", open=True):
|
| 403 |
-
|
| 404 |
speaker_output = gr.Textbox(label="👥 Diarisation (format simplifié)", info="Identification des locuteurs. Format : 'SPEAKER_XX: texte'")
|
| 405 |
with gr.Accordion("Métadonnées (optionnel) 📌", open=False):
|
| 406 |
audio_duration = gr.Textbox(label="⏱️ Durée de l'audio (mm:ss)")
|
|
@@ -484,10 +474,9 @@ with demo:
|
|
| 484 |
|
| 485 |
# Connexions des boutons aux fonctions appropriées
|
| 486 |
transcribe_button.click(
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
show_progress=True,
|
| 491 |
)
|
| 492 |
|
| 493 |
format_button.click(
|
|
@@ -522,4 +511,4 @@ with demo:
|
|
| 522 |
|
| 523 |
|
| 524 |
if __name__ == "__main__":
|
| 525 |
-
demo.queue().launch()
|
|
|
|
| 36 |
FILE_LIMIT_MB = 1000
|
| 37 |
YT_LENGTH_LIMIT_S = 3600
|
| 38 |
|
| 39 |
+
device = 0 if torch.cuda.is_available() else "cpu"
|
| 40 |
|
| 41 |
pipe = pipeline(
|
| 42 |
task="automatic-speech-recognition",
|
| 43 |
model=MODEL_NAME,
|
| 44 |
+
#chunk_length_s=30,
|
| 45 |
device=device,
|
| 46 |
model_kwargs={"low_cpu_mem_usage": True},
|
|
|
|
| 47 |
)
|
| 48 |
|
| 49 |
|
| 50 |
|
| 51 |
|
|
|
|
| 52 |
def associate_speakers_with_timestamps(transcription_result, diarization, tolerance=0.1, min_segment_duration=0.5):
|
| 53 |
word_segments = transcription_result['chunks']
|
| 54 |
diarization_segments = list(diarization.itertracks(yield_label=True))
|
|
|
|
| 123 |
def process_transcription(*args):
|
| 124 |
generator = transcribe_and_diarize(*args)
|
| 125 |
for progress_message, raw_text, speaker_transcription in generator:
|
| 126 |
+
pass # Consommer le générateur jusqu'à la fin
|
|
|
|
|
|
|
| 127 |
simplified_diarization = simplify_diarization_output(speaker_transcription)
|
| 128 |
+
return progress_message, raw_text, simplified_diarization
|
| 129 |
|
| 130 |
def process_yt_transcription(*args):
|
| 131 |
html_embed, raw_text, speaker_transcription = yt_transcribe(*args)
|
|
|
|
| 173 |
""")
|
| 174 |
|
| 175 |
@spaces.GPU(duration=120)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
def transcribe_and_diarize(file_path, task, progress=gr.Progress()):
|
| 177 |
progress(0, desc="Initialisation...")
|
| 178 |
yield "Chargement du fichier...", None, None
|
|
|
|
| 180 |
progress(0.2, desc="Préparation de l'audio...")
|
| 181 |
yield "Préparation de l'audio...", None, None
|
| 182 |
|
| 183 |
+
progress(0.4, desc="Laissez moi quelques minutes pour déchiffrer les voix et rédiger l'audio 🤓 ✍️ ...")
|
| 184 |
+
transcription_result = pipe(file_path, generate_kwargs={"task": task, "language": "fr"}, return_timestamps="word")
|
| 185 |
+
yield "Transcription en cours...", None, None
|
| 186 |
+
|
|
|
|
|
|
|
| 187 |
progress(0.6, desc=" C'est fait 😮💨 ! Je m'active à fusionner tout ça, un instant, J'y suis presque...")
|
| 188 |
if diarization_pipeline:
|
| 189 |
diarization = diarization_pipeline(file_path)
|
|
|
|
| 314 |
|
| 315 |
|
| 316 |
with demo:
|
| 317 |
+
gr.Markdown("# 🎙️ **Scribe** : L'assistant de Transcription Audio Intelligent 📝 ⚠️ Cette version est une maquette publique. Ne pas mettre de données sensibles, privées ou confidentielles.")
|
|
|
|
| 318 |
gr.HTML(
|
| 319 |
"""
|
| 320 |
<div class="logo">
|
|
|
|
| 390 |
progress_display = gr.Markdown(label="État de la progression")
|
| 391 |
|
| 392 |
with gr.Accordion("Résultats 📊", open=True):
|
| 393 |
+
raw_output = gr.Textbox(label="📝 Transcription brute", info="Texte généré par le modèle. Modifiable si nécessaire.")
|
| 394 |
speaker_output = gr.Textbox(label="👥 Diarisation (format simplifié)", info="Identification des locuteurs. Format : 'SPEAKER_XX: texte'")
|
| 395 |
with gr.Accordion("Métadonnées (optionnel) 📌", open=False):
|
| 396 |
audio_duration = gr.Textbox(label="⏱️ Durée de l'audio (mm:ss)")
|
|
|
|
| 474 |
|
| 475 |
# Connexions des boutons aux fonctions appropriées
|
| 476 |
transcribe_button.click(
|
| 477 |
+
process_transcription,
|
| 478 |
+
inputs=[audio_input, task_input],
|
| 479 |
+
outputs=[progress_display, raw_output, speaker_output]
|
|
|
|
| 480 |
)
|
| 481 |
|
| 482 |
format_button.click(
|
|
|
|
| 511 |
|
| 512 |
|
| 513 |
if __name__ == "__main__":
|
| 514 |
+
demo.queue().launch()
|