Update app.py
Browse files
app.py
CHANGED
|
@@ -245,31 +245,46 @@ def transcribe_and_diarize(file_path, task, progress=gr.Progress()):
|
|
| 245 |
progress(1.0, desc="Terminé!")
|
| 246 |
return "Transcription terminée!", transcription_result['text'], speaker_transcription
|
| 247 |
|
| 248 |
-
def format_to_markdown(
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
try:
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
except Exception as e:
|
| 271 |
print(f"Error formatting speaker transcription: {e}")
|
| 272 |
-
transcription_text = "Error formatting speaker transcription. Using raw transcription instead.\n\n" +
|
| 273 |
|
| 274 |
formatted_output = f"""
|
| 275 |
# Transcription Formatée
|
|
@@ -282,6 +297,9 @@ def format_to_markdown(transcription_text, speaker_transcription, audio_duration
|
|
| 282 |
"""
|
| 283 |
return formatted_output
|
| 284 |
|
|
|
|
|
|
|
|
|
|
| 285 |
def _return_yt_html_embed(yt_url):
|
| 286 |
video_id = yt_url.split("?v=")[-1]
|
| 287 |
HTML_str = (
|
|
@@ -446,6 +464,7 @@ with demo:
|
|
| 446 |
audio_duration = gr.Textbox(label="⏱️ Durée de l'audio (mm:ss)")
|
| 447 |
location = gr.Textbox(label="📍 Lieu de l'enregistrement")
|
| 448 |
speaker_age = gr.Number(label="👤 Âge de l'intervenant principal")
|
|
|
|
| 449 |
context = gr.Textbox(label="📝 Contexte de l'enregistrement")
|
| 450 |
|
| 451 |
format_button = gr.Button("✨ Générer la transcription formatée", elem_classes="button-secondary")
|
|
@@ -524,9 +543,9 @@ with demo:
|
|
| 524 |
)
|
| 525 |
|
| 526 |
format_button.click(
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
)
|
| 531 |
|
| 532 |
mic_transcribe_button.click(
|
|
|
|
| 245 |
progress(1.0, desc="Terminé!")
|
| 246 |
return "Transcription terminée!", transcription_result['text'], speaker_transcription
|
| 247 |
|
| 248 |
+
def format_to_markdown(transcription_result, audio_duration=None, location=None, speaker_age=None, context=None, custom_speaker_names=None):
|
| 249 |
+
if isinstance(transcription_result, dict):
|
| 250 |
+
metadata = transcription_result.get("metadata", {})
|
| 251 |
+
transcription = transcription_result.get("transcription", [])
|
| 252 |
+
else:
|
| 253 |
+
metadata = {}
|
| 254 |
+
transcription = transcription_result
|
| 255 |
+
|
| 256 |
+
speaker_count = metadata.get("speaker_count", "non spécifié")
|
| 257 |
+
speakers = metadata.get("speakers", [])
|
| 258 |
+
|
| 259 |
+
metadata_text = "\n".join([
|
| 260 |
+
f"- **Date de traitement** : '{datetime.now().strftime('%d/%m/%Y %H:%M')}'",
|
| 261 |
+
f"- **Durée de l'audio** : '{audio_duration if audio_duration else '[à remplir]'} secondes'",
|
| 262 |
+
f"- **Lieu** : '{location if location else '[non spécifié]'}'",
|
| 263 |
+
f"- **Âge de l'intervenant** : '{f'{speaker_age} ans' if speaker_age else '[non spécifié]'}'",
|
| 264 |
+
f"- **Contexte** : '{context if context else '[non spécifié]'}'",
|
| 265 |
+
f"- **Nombre d'interlocuteurs** : '{speaker_count}'",
|
| 266 |
+
f"- **Interlocuteurs bruts** : '{', '.join(speakers)}'"
|
| 267 |
+
])
|
| 268 |
+
|
| 269 |
try:
|
| 270 |
+
formatted_transcription = []
|
| 271 |
+
for segment in transcription:
|
| 272 |
+
speaker = segment['speaker']
|
| 273 |
+
text = segment['text']
|
| 274 |
+
start_time = format_time(segment['start'])
|
| 275 |
+
end_time = format_time(segment['end'])
|
| 276 |
+
|
| 277 |
+
if custom_speaker_names and speaker in custom_speaker_names:
|
| 278 |
+
display_speaker = custom_speaker_names[speaker]
|
| 279 |
+
else:
|
| 280 |
+
display_speaker = speaker
|
| 281 |
+
|
| 282 |
+
formatted_transcription.append(f"**[{start_time} - {end_time}] {display_speaker}**: {text}")
|
| 283 |
+
|
| 284 |
+
transcription_text = "\n\n".join(formatted_transcription)
|
| 285 |
except Exception as e:
|
| 286 |
print(f"Error formatting speaker transcription: {e}")
|
| 287 |
+
transcription_text = "Error formatting speaker transcription. Using raw transcription instead.\n\n" + str(transcription)
|
| 288 |
|
| 289 |
formatted_output = f"""
|
| 290 |
# Transcription Formatée
|
|
|
|
| 297 |
"""
|
| 298 |
return formatted_output
|
| 299 |
|
| 300 |
+
def format_time(seconds):
|
| 301 |
+
return f"{int(seconds // 60):02d}:{int(seconds % 60):02d}"
|
| 302 |
+
|
| 303 |
def _return_yt_html_embed(yt_url):
|
| 304 |
video_id = yt_url.split("?v=")[-1]
|
| 305 |
HTML_str = (
|
|
|
|
| 464 |
audio_duration = gr.Textbox(label="⏱️ Durée de l'audio (mm:ss)")
|
| 465 |
location = gr.Textbox(label="📍 Lieu de l'enregistrement")
|
| 466 |
speaker_age = gr.Number(label="👤 Âge de l'intervenant principal")
|
| 467 |
+
custom_speaker_names = gr.TextArea(label="Noms personnalisés des locuteurs (format: SPEAKER_00: Nom1, SPEAKER_01: Nom2)")
|
| 468 |
context = gr.Textbox(label="📝 Contexte de l'enregistrement")
|
| 469 |
|
| 470 |
format_button = gr.Button("✨ Générer la transcription formatée", elem_classes="button-secondary")
|
|
|
|
| 543 |
)
|
| 544 |
|
| 545 |
format_button.click(
|
| 546 |
+
format_to_markdown,
|
| 547 |
+
inputs=[raw_output, speaker_output, audio_duration, location, speaker_age, context, custom_speaker_names],
|
| 548 |
+
outputs=formatted_output
|
| 549 |
)
|
| 550 |
|
| 551 |
mic_transcribe_button.click(
|