Woziii commited on
Commit
ae7bb8d
·
verified ·
1 Parent(s): 71e2e85

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -15
app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import os
3
  import shutil
4
  import zipfile
@@ -6,7 +5,7 @@ import torch
6
  from pathlib import Path
7
 
8
  import gradio as gr
9
- from pydub import AudioSegment # Correction de l'import erroné de 'pubdub'
10
  from transformers import pipeline
11
 
12
  # -------------------------------------------------
@@ -53,21 +52,35 @@ def validate_segments(audio_path, table_data, metadata_state, word_timestamps):
53
  updated_metadata = []
54
 
55
  for i, row in enumerate(table_data):
56
- text, start_time, end_time, segment_id = row
57
- if not text or start_time is None or end_time is None:
58
- continue
59
-
60
- segment_id = segment_id or f"seg_{i+1:02d}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  start_ms, end_ms = int(float(start_time) * 1000), int(float(end_time) * 1000)
62
  if start_ms < 0 or end_ms <= start_ms:
63
  continue
64
-
65
  segment_filename = f"{Path(audio_path).stem}_{segment_id}.wav"
66
  segment_path = os.path.join(TEMP_DIR, segment_filename)
67
-
68
  extract = original_audio[start_ms:end_ms]
69
  extract.export(segment_path, format="wav")
70
-
71
  segment_paths.append(segment_path)
72
  updated_metadata.append({
73
  "audio_file": segment_filename,
@@ -112,10 +125,10 @@ with gr.Blocks() as demo:
112
  audio_input = gr.Audio(type="filepath", label="Fichier audio")
113
  raw_transcription = gr.Textbox(label="Transcription", interactive=False)
114
  table = gr.Dataframe(
115
- headers=["Texte", "Début (s)", "Fin (s)", "ID"],
116
- datatype=["str", "number", "number", "str"],
117
- row_count=(1, "dynamic"), # Gestion dynamique confirmée
118
- col_count=4
119
  )
120
  validate_button = gr.Button("Valider")
121
  audio_players = [gr.Audio(label=f"Extrait {i+1}", interactive=False) for i in range(20)]
@@ -129,4 +142,3 @@ with gr.Blocks() as demo:
129
  generate_button.click(generate_zip, inputs=metadata_state, outputs=zip_file)
130
 
131
  demo.queue().launch()
132
-
 
 
1
  import os
2
  import shutil
3
  import zipfile
 
5
  from pathlib import Path
6
 
7
  import gradio as gr
8
+ from pydub import AudioSegment
9
  from transformers import pipeline
10
 
11
  # -------------------------------------------------
 
52
  updated_metadata = []
53
 
54
  for i, row in enumerate(table_data):
55
+ if len(row) < 1:
56
+ continue # Ignorer les lignes vides
57
+
58
+ text = row[0] # L'utilisateur n'entre que le texte
59
+ segment_id = f"seg_{i+1:02d}"
60
+
61
+ # Trouver les timestamps correspondant au texte
62
+ matching_timestamps = [
63
+ (start, end) for word, (start, end) in word_timestamps if word in text
64
+ ]
65
+
66
+ if matching_timestamps:
67
+ start_time, end_time = matching_timestamps[0] # Premier match trouvé
68
+ else:
69
+ start_time, end_time = None, None # Impossible de déterminer
70
+
71
+ if start_time is None or end_time is None:
72
+ continue # Ignorer si aucun timestamp trouvé
73
+
74
  start_ms, end_ms = int(float(start_time) * 1000), int(float(end_time) * 1000)
75
  if start_ms < 0 or end_ms <= start_ms:
76
  continue
77
+
78
  segment_filename = f"{Path(audio_path).stem}_{segment_id}.wav"
79
  segment_path = os.path.join(TEMP_DIR, segment_filename)
80
+
81
  extract = original_audio[start_ms:end_ms]
82
  extract.export(segment_path, format="wav")
83
+
84
  segment_paths.append(segment_path)
85
  updated_metadata.append({
86
  "audio_file": segment_filename,
 
125
  audio_input = gr.Audio(type="filepath", label="Fichier audio")
126
  raw_transcription = gr.Textbox(label="Transcription", interactive=False)
127
  table = gr.Dataframe(
128
+ headers=["Texte"],
129
+ datatype=["str"],
130
+ row_count=(1, "dynamic"), # Gestion dynamique
131
+ col_count=1
132
  )
133
  validate_button = gr.Button("Valider")
134
  audio_players = [gr.Audio(label=f"Extrait {i+1}", interactive=False) for i in range(20)]
 
142
  generate_button.click(generate_zip, inputs=metadata_state, outputs=zip_file)
143
 
144
  demo.queue().launch()