danzapp70 commited on
Commit
3c11817
·
verified ·
1 Parent(s): 0d6f640

Deploy version v1.2.0

Browse files
.gitattributes CHANGED
@@ -36,3 +36,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
36
  output/romano_subbed_with_romano_faster_whisper.mp4 filter=lfs diff=lfs merge=lfs -text
37
  output/romano_subbed_with_romano_openai_whisper.mp4 filter=lfs diff=lfs merge=lfs -text
38
  output/romano_audio.mp3 filter=lfs diff=lfs merge=lfs -text
 
 
36
  output/romano_subbed_with_romano_faster_whisper.mp4 filter=lfs diff=lfs merge=lfs -text
37
  output/romano_subbed_with_romano_openai_whisper.mp4 filter=lfs diff=lfs merge=lfs -text
38
  output/romano_audio.mp3 filter=lfs diff=lfs merge=lfs -text
39
+ output/Audio[[:space:]]2_[[:space:]]Take[[:space:]]2[[:space:]]mp3cut.net_20250623192512.wav filter=lfs diff=lfs merge=lfs -text
Documentation.md CHANGED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Direzione per lo Sviluppo la Gestione e la Sicurezza dei Sistemi Informativi e l'Innovazione Digitale
2
+
3
+ ## Transcribe it - Webapp Gradio
4
+
5
+ ### Descrizione
6
+ Questa applicazione web consente di estrarre, modificare e scaricare sottotitoli da file video o audio, con una gestione avanzata dello storico, editor audio, player video custom e robusta gestione degli errori. L'interfaccia è realizzata con Gradio e ottimizzata per la massima semplicità d'uso.
7
+
8
+ ### Funzionalità principali
9
+ - **Upload di file video o audio**: supporto a formati comuni (mp4, mp3, wav, ecc.).
10
+ - **Estrazione automatica dell'audio dai video**.
11
+ - **Editor audio integrato**: possibilità di modificare la traccia audio prima della generazione sottotitoli.
12
+ - **Generazione sottotitoli**:
13
+ - Supporto a due motori: Faster Whisper (locale) e OpenAI Whisper (cloud, con gestione API Key).
14
+ - Suddivisione automatica dei file audio troppo grandi (>25MB) in segmenti MP3, con trascrizione e ricostruzione automatica.
15
+ - Salvataggio sia del file SRT (con tempi) che del file TXT (solo testo puro).
16
+ - **Storico processi**:
17
+ - Ogni generazione aggiunge due righe: una per il file SRT, una per il TXT.
18
+ - Colonna "Tipologia SRT" per distinguere tra SRT con tempi e testo puro.
19
+ - Cronologia consultabile e azioni rapide (modifica, download, unione video, elimina).
20
+ - **Player video custom**: anteprima del video originale e del video sottotitolato.
21
+ - **Player audio**: sempre visibile dopo upload/estrazione e durante la generazione sottotitoli.
22
+ - **Editor SRT integrato**: modifica e salvataggio diretto dei sottotitoli.
23
+ - **Unione sottotitoli al video**: generazione automatica del video finale con sottotitoli hardcoded.
24
+ - **Download**: scarica file SRT, TXT e audio modificato.
25
+ - **Loader visivi**: spinner e messaggi di caricamento durante tutte le operazioni lunghe (generazione sottotitoli, merge video, caricamento player).
26
+ - **Gestione robusta degli errori**: feedback chiari e nessun crash anche in caso di input non valido o errori API.
27
+ - **Compatibilità multipiattaforma**: funziona su Windows, macOS e Linux.
28
+
29
+ ### Requisiti
30
+ - Python 3.8+
31
+ - Dipendenze principali: gradio, moviepy, pydub, pandas, faster-whisper, openai
32
+
33
+ ### Avvio rapido
34
+ 1. Installa le dipendenze: `pip install -r requirements.txt`
35
+ 2. Avvia l'app: `python app.py`
36
+ 3. Accedi all'interfaccia web tramite il link fornito in console.
37
+
38
+ ### Note di sicurezza
39
+ - La chiave API OpenAI non viene salvata e viene usata solo per la sessione corrente.
40
+ - I file temporanei vengono eliminati automaticamente al termine della sessione.
41
+
42
+ ---
43
+
app.py CHANGED
@@ -3,10 +3,10 @@ import os
3
  import json
4
  import logging
5
  from moviepy.editor import VideoFileClip, AudioFileClip
 
6
 
7
  from src.subtitle_extractor import transcribe_audio, save_srt
8
  import time
9
- import shutil
10
  import subprocess
11
  from datetime import datetime
12
  import pandas as pd
@@ -37,30 +37,20 @@ def format_timestamp(seconds):
37
  def extract_audio_only(video_path, progress=gr.Progress(track_tqdm=True)):
38
  if not video_path:
39
  gr.Warning("Carica prima un video per estrarre l'audio.")
40
- # Restituisce 3 valori anche in caso di errore
41
- return None, None, gr.update(visible=False)
42
  try:
43
  gr.Info("Estrazione audio in corso...")
44
  video = VideoFileClip(video_path)
45
-
46
- output_dir = os.path.join(os.getcwd(), "output") # Salva ancora nella cartella temporanea definita all'inizio
47
  os.makedirs(output_dir, exist_ok=True)
48
-
49
  base_name = os.path.splitext(os.path.basename(video_path))[0]
50
  audio_filename = os.path.join(output_dir, f"{base_name}_audio.mp3")
51
-
52
  video.audio.write_audiofile(audio_filename, logger=None)
53
-
54
  gr.Info("Estrazione audio completata.")
55
-
56
- # --- LA RIGA CORRETTA È QUESTA ---
57
- # Ora restituisce 3 valori: il player, lo stato per l'undo, e la visibilità del gruppo
58
- return gr.update(value=audio_filename, visible=True), audio_filename, gr.update(visible=True)
59
-
60
  except Exception as e:
61
  gr.Error(f"Errore durante l'estrazione dell'audio: {e}")
62
- # Restituisce 3 valori anche in caso di eccezione
63
- return None, None, gr.update(visible=False)
64
  def merge_subtitles(video_path, srt_path, progress=gr.Progress(track_tqdm=True)):
65
  if not video_path or not srt_path:
66
  gr.Warning("Percorso video o sottotitoli mancante!"); return None, None
@@ -84,42 +74,61 @@ def transcribe(video_path, edited_audio_path, library, api_key, words_per_sub, c
84
  global stop_requested
85
  if stop_requested:
86
  logging.warning("Transcription stopped by user.")
87
- return current_history, gr.update(interactive=True), update_dataframe(current_history)
 
 
 
 
 
88
 
89
  audio_source = None
 
 
90
  if edited_audio_path and os.path.exists(edited_audio_path):
91
  logging.info("Using edited audio for transcription.")
92
  audio_source = edited_audio_path
 
 
93
  elif video_path and os.path.exists(video_path):
94
  logging.info("Extracting audio from original video for transcription...")
95
  try:
96
  video = VideoFileClip(video_path)
97
- audio_source = os.path.join(TEMP_DIR, "temp_transcribe_audio.wav")
98
- video.audio.write_audiofile(audio_source, logger=None)
99
- logging.info(f"Audio extracted to: {audio_source}")
 
 
 
 
 
 
 
 
 
100
  except Exception as e:
101
  logging.error(f"Error extracting audio: {e}")
102
- return current_history, gr.update(interactive=True), update_dataframe(current_history)
103
  else:
104
  logging.error("No valid video or audio source provided.")
105
- return current_history, gr.update(interactive=True), update_dataframe(current_history)
106
 
107
  try:
108
  if library == "OpenAI Whisper":
109
- if not api_key:
110
- logging.error("Missing OpenAI API Key.")
111
- gr.Error("API Key OpenAI mancante.")
112
- return current_history, gr.update(interactive=True), update_dataframe(current_history)
113
  logging.info("Using OpenAI Whisper for transcription.")
114
- srt_content = transcribe_audio(
115
- audio_source,
116
- library="OpenAI Whisper",
117
- api_key=api_key,
118
- words_per_sub=int(words_per_sub),
119
- )
 
 
 
 
 
120
  else:
121
  logging.info("Using Faster Whisper for transcription.")
122
- srt_content = transcribe_audio(
123
  audio_source,
124
  library="faster_whisper",
125
  api_key=None,
@@ -129,37 +138,51 @@ def transcribe(video_path, edited_audio_path, library, api_key, words_per_sub, c
129
  except Exception as e:
130
  logging.error(f"Error during transcription: {e}")
131
  gr.Error(f"Errore trascrizione: {e}")
132
- return current_history, gr.update(interactive=True), update_dataframe(current_history)
133
 
134
  base_name = os.path.splitext(os.path.basename(video_path or audio_source))[0]
135
- srt_filename = os.path.join(TEMP_DIR, f"{base_name}.srt")
 
 
136
  try:
137
  save_srt(srt_content, srt_filename)
138
- logging.info(f"SRT file saved successfully at: {srt_filename}")
 
 
139
  except Exception as e:
140
- logging.error(f"Error saving SRT file: {e}")
141
- return current_history, gr.update(interactive=True), update_dataframe(current_history)
142
 
143
  if audio_source.startswith(TEMP_DIR) and os.path.basename(audio_source) == "temp_transcribe_audio.wav":
144
  os.remove(audio_source)
145
  logging.info("Temporary audio file removed.")
146
 
147
  elapsed_time = time.time() - start_time
148
- new_entry = {
149
  "File SRT": os.path.basename(srt_filename),
150
  "Libreria": library,
151
- "Tempo Impiegato (s)": f"{elapsed_time:.2f}",
152
  "Percorso Completo": srt_filename,
153
  "Video Unito": None,
154
  "Orario Generazione": datetime.now().strftime("%H:%M:%S"),
155
  "Orario Unione": "",
156
  }
157
- logging.debug(f"Adding new entry to history: {new_entry}")
158
- updated_history = [e for e in current_history if e["File SRT"] != os.path.basename(srt_filename)]
159
- updated_history.append(new_entry)
 
 
 
 
 
 
 
 
 
160
  logging.debug(f"Updated history: {updated_history}")
161
 
162
- return updated_history, gr.update(interactive=False), update_dataframe(updated_history)
 
163
 
164
  # ... (tutte le altre funzioni helper come save_srt_changes, etc. rimangono qui)
165
 
@@ -195,12 +218,28 @@ try:
195
  except FileNotFoundError: VERSION = "1.0.0"
196
  BADGE = f"<span style='background:#1976d2;color:white;padding:2px 8px;border-radius:8px;font-size:0.9em;margin-left:8px;'>v{VERSION}</span>"
197
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<script>{js_loader_script}</script>") as demo:
199
  srt_history_state = gr.State([])
200
  selected_srt_path_state = gr.State(None)
201
  original_audio_path_state = gr.State()
202
 
203
- gr.Markdown(f"<h1>Estrattore Sottotitoli {BADGE}</h1>")
204
 
205
  gr.Markdown("### 1. Carica un file")
206
  video_input = gr.File(label="Carica un file video o audio", file_types=["video", "audio"])
@@ -215,6 +254,8 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
215
  api_key_input = gr.Textbox(label="API Key OpenAI", type="password", placeholder="sk-...")
216
  cost_estimate = gr.Markdown()
217
  words_slider = gr.Slider(minimum=6, maximum=15, value=7, step=1, label="Parole per sottotitolo")
 
 
218
  submit_btn = gr.Button("▶️ Genera Sottotitoli", variant="primary")
219
  stop_btn = gr.Button("⏹️ Arresta", variant="stop", visible=False)
220
  loader = gr.HTML("""<div id="loader-container" style='text-align:center; display:none; margin-top:1rem;'><div style='display:inline-block; position:relative; width:50px; height:50px;'><svg width='50' height='50' viewBox='0 0 50 50'><circle cx='25' cy='25' r='20' fill='none' stroke='#1976d2' stroke-width='5' stroke-linecap='round' stroke-dasharray='100' stroke-dashoffset='60'><animateTransform attributeName='transform' type='rotate' from='0 25 25' to='360 25 25' dur='1.5s' repeatCount='indefinite'/></circle></svg><div id='timer' style='position:absolute; top:50%; left:50%; transform:translate(-50%,-50%); font-size:0.9em; color:#1976d2;'>0s</div></div></div>""")
@@ -222,18 +263,28 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
222
  with gr.Column(scale=2):
223
  gr.Markdown("### 3. Anteprima ed Editor")
224
  video_preview = gr.Video(label="Anteprima Video/Audio Originale", interactive=False)
225
- with gr.Group(visible=False) as audio_editor_group:
226
- audio_output = gr.Audio(label="Editor Traccia Audio", editable=True, type="filepath")
227
- undo_audio_btn = gr.Button("↩️ Ripristina Audio Originale")
228
- final_video = gr.Video(label="Video Finale con Sottotitoli", interactive=False)
 
 
 
 
 
 
 
 
229
 
230
  with gr.Column():
231
  gr.Markdown("--- \n### 4. Cronologia e Azioni sui Sottotitoli\n*Seleziona una riga per attivare le azioni.*")
232
- history_df = gr.Dataframe(headers=["File SRT", "Libreria", "Orario Generazione", "Video Unito", "Orario Unione"], interactive=True)
233
  with gr.Row(visible=False) as action_buttons:
234
  edit_btn = gr.Button("📝 Modifica SRT")
235
  merge_btn = gr.Button("🎬 Unisci al Video", variant="secondary")
236
  delete_btn = gr.Button("🗑️ Elimina", variant="stop")
 
 
237
  with gr.Accordion("Editor Testo Sottotitoli", open=False, visible=False) as srt_editor_accordion:
238
  srt_editor_box = gr.Textbox(lines=15, label="Contenuto file .srt", show_copy_button=True, interactive=True)
239
  save_edit_btn = gr.Button("💾 Salva Modifiche", variant="primary")
@@ -242,11 +293,34 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
242
 
243
  # MODIFICATA: Logica semplificata e robusta
244
  def show_main_controls(file_obj):
 
 
245
  if file_obj:
246
- # Se un file viene caricato, mostra il pannello principale e l'anteprima
247
- return gr.update(visible=True, value=file_obj.name), gr.update(visible=True), gr.update(interactive=True)
248
- # Se il file viene cancellato, nascondi tutto
249
- return gr.update(visible=False, value=None), gr.update(visible=False), gr.update(interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
 
251
  def on_select_srt(history_data, evt: gr.SelectData):
252
  if evt.index is None:
@@ -301,12 +375,16 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
301
 
302
  # --- CABLAGGIO EVENTI ---
303
 
304
- video_input.upload(fn=show_main_controls, inputs=video_input, outputs=[video_preview, main_panel, submit_btn])
 
 
 
 
305
 
306
  extract_audio_btn.click(
307
  fn=extract_audio_only,
308
  inputs=[video_input],
309
- outputs=[audio_output, original_audio_path_state, audio_editor_group],
310
  )
311
 
312
  undo_audio_btn.click(
@@ -321,8 +399,23 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
321
  outputs=openai_options,
322
  )
323
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  submit_btn.click(
325
- fn=transcribe,
326
  inputs=[
327
  video_input,
328
  audio_output,
@@ -331,7 +424,9 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
331
  words_slider,
332
  srt_history_state,
333
  ],
334
- outputs=[srt_history_state, submit_btn, history_df],
 
 
335
  )
336
 
337
  # Aggiorna il cablaggio eventi per history_df
@@ -359,11 +454,58 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
359
  outputs=[srt_editor_accordion] # Rende visibile l'accordion
360
  )
361
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  # Aggiorna il cablaggio eventi per merge_btn
363
  merge_btn.click(
364
- fn=merge_subtitles,
 
 
 
365
  inputs=[video_input, selected_srt_path_state],
366
- outputs=[final_video] # Aggiorna solo il contenuto del video finale
 
 
 
 
 
 
 
 
 
 
 
 
367
  )
368
 
369
  # Rende visibile il video finale quando viene cliccato il pulsante
@@ -373,6 +515,13 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
373
  outputs=[final_video] # Rende visibile il componente del video finale
374
  )
375
 
 
 
 
 
 
 
 
376
  # Aggiorna il cablaggio eventi per delete_btn
377
  delete_btn.click(
378
  fn=delete_selected,
@@ -380,6 +529,36 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
380
  outputs=[srt_history_state, action_buttons] # Update history and hide action buttons
381
  )
382
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383
 
384
  if __name__ == "__main__":
385
  demo.queue().launch() # Rimosso `share=True` per eseguire l'app localmente
 
3
  import json
4
  import logging
5
  from moviepy.editor import VideoFileClip, AudioFileClip
6
+ import shutil
7
 
8
  from src.subtitle_extractor import transcribe_audio, save_srt
9
  import time
 
10
  import subprocess
11
  from datetime import datetime
12
  import pandas as pd
 
37
  def extract_audio_only(video_path, progress=gr.Progress(track_tqdm=True)):
38
  if not video_path:
39
  gr.Warning("Carica prima un video per estrarre l'audio.")
40
+ return gr.update(visible=False, value=None), None, gr.update(visible=False, value=None)
 
41
  try:
42
  gr.Info("Estrazione audio in corso...")
43
  video = VideoFileClip(video_path)
44
+ output_dir = os.path.join(os.getcwd(), "output")
 
45
  os.makedirs(output_dir, exist_ok=True)
 
46
  base_name = os.path.splitext(os.path.basename(video_path))[0]
47
  audio_filename = os.path.join(output_dir, f"{base_name}_audio.mp3")
 
48
  video.audio.write_audiofile(audio_filename, logger=None)
 
49
  gr.Info("Estrazione audio completata.")
50
+ return gr.update(value=audio_filename, visible=True), audio_filename, gr.update(visible=True, value=audio_filename)
 
 
 
 
51
  except Exception as e:
52
  gr.Error(f"Errore durante l'estrazione dell'audio: {e}")
53
+ return gr.update(visible=False, value=None), None, gr.update(visible=False, value=None)
 
54
  def merge_subtitles(video_path, srt_path, progress=gr.Progress(track_tqdm=True)):
55
  if not video_path or not srt_path:
56
  gr.Warning("Percorso video o sottotitoli mancante!"); return None, None
 
74
  global stop_requested
75
  if stop_requested:
76
  logging.warning("Transcription stopped by user.")
77
+ return current_history, gr.update(interactive=True), update_dataframe(current_history), None, None
78
+
79
+ # --- VALIDAZIONE API KEY ---
80
+ if library == "OpenAI Whisper" and (not api_key or not api_key.strip()):
81
+ gr.Error("Devi inserire la API Key OpenAI per usare questa modalità.")
82
+ return current_history, gr.update(interactive=True), update_dataframe(current_history), None, None
83
 
84
  audio_source = None
85
+ audio_editor_update = None
86
+ original_audio_update = None
87
  if edited_audio_path and os.path.exists(edited_audio_path):
88
  logging.info("Using edited audio for transcription.")
89
  audio_source = edited_audio_path
90
+ audio_editor_update = gr.update(value=edited_audio_path, visible=True)
91
+ original_audio_update = edited_audio_path
92
  elif video_path and os.path.exists(video_path):
93
  logging.info("Extracting audio from original video for transcription...")
94
  try:
95
  video = VideoFileClip(video_path)
96
+ temp_audio_path = os.path.join(TEMP_DIR, "temp_transcribe_audio.wav")
97
+ video.audio.write_audiofile(temp_audio_path, logger=None)
98
+ output_dir = os.path.join(os.getcwd(), "output")
99
+ os.makedirs(output_dir, exist_ok=True)
100
+ base_name = os.path.splitext(os.path.basename(video_path))[0]
101
+ audio_filename = os.path.join(output_dir, f"{base_name}_audio_gradio.wav")
102
+ import shutil
103
+ shutil.copy(temp_audio_path, audio_filename)
104
+ rel_audio_path = os.path.relpath(audio_filename, os.getcwd())
105
+ audio_source = audio_filename
106
+ audio_editor_update = gr.update(value=rel_audio_path, visible=True)
107
+ original_audio_update = rel_audio_path
108
  except Exception as e:
109
  logging.error(f"Error extracting audio: {e}")
110
+ return current_history, gr.update(interactive=True), update_dataframe(current_history), gr.update(visible=False, value=None), None, None
111
  else:
112
  logging.error("No valid video or audio source provided.")
113
+ return current_history, gr.update(interactive=True), update_dataframe(current_history), gr.update(visible=False, value=None), None, None
114
 
115
  try:
116
  if library == "OpenAI Whisper":
 
 
 
 
117
  logging.info("Using OpenAI Whisper for transcription.")
118
+ try:
119
+ srt_content, plain_text = transcribe_audio(
120
+ audio_source,
121
+ library="OpenAI Whisper",
122
+ api_key=api_key,
123
+ words_per_sub=int(words_per_sub),
124
+ )
125
+ except Exception as e:
126
+ logging.error(f"Errore chiamata OpenAI Whisper: {e}")
127
+ gr.Error(f"Errore OpenAI Whisper: {e}")
128
+ return current_history, gr.update(interactive=True), update_dataframe(current_history), None, None
129
  else:
130
  logging.info("Using Faster Whisper for transcription.")
131
+ srt_content, plain_text = transcribe_audio(
132
  audio_source,
133
  library="faster_whisper",
134
  api_key=None,
 
138
  except Exception as e:
139
  logging.error(f"Error during transcription: {e}")
140
  gr.Error(f"Errore trascrizione: {e}")
141
+ return current_history, gr.update(interactive=True), update_dataframe(current_history), None, None
142
 
143
  base_name = os.path.splitext(os.path.basename(video_path or audio_source))[0]
144
+ engine_suffix = "_openai" if library == "OpenAI Whisper" else "_fasterwhisper"
145
+ srt_filename = os.path.join(TEMP_DIR, f"{base_name}{engine_suffix}.srt")
146
+ txt_filename = os.path.join(TEMP_DIR, f"{base_name}{engine_suffix}.txt")
147
  try:
148
  save_srt(srt_content, srt_filename)
149
+ from src.subtitle_extractor import save_txt
150
+ save_txt(plain_text, txt_filename)
151
+ logging.info(f"SRT file saved at: {srt_filename}, TXT file saved at: {txt_filename}")
152
  except Exception as e:
153
+ logging.error(f"Error saving SRT/TXT file: {e}")
154
+ return current_history, gr.update(interactive=True), update_dataframe(current_history), None, None
155
 
156
  if audio_source.startswith(TEMP_DIR) and os.path.basename(audio_source) == "temp_transcribe_audio.wav":
157
  os.remove(audio_source)
158
  logging.info("Temporary audio file removed.")
159
 
160
  elapsed_time = time.time() - start_time
161
+ new_entry_srt = {
162
  "File SRT": os.path.basename(srt_filename),
163
  "Libreria": library,
164
+ "Tipologia SRT": "SRT con tempi",
165
  "Percorso Completo": srt_filename,
166
  "Video Unito": None,
167
  "Orario Generazione": datetime.now().strftime("%H:%M:%S"),
168
  "Orario Unione": "",
169
  }
170
+ new_entry_txt = {
171
+ "File SRT": os.path.basename(txt_filename),
172
+ "Libreria": library,
173
+ "Tipologia SRT": "Testo puro",
174
+ "Percorso Completo": txt_filename,
175
+ "Video Unito": None,
176
+ "Orario Generazione": datetime.now().strftime("%H:%M:%S"),
177
+ "Orario Unione": "",
178
+ }
179
+ updated_history = current_history.copy()
180
+ updated_history.append(new_entry_srt)
181
+ updated_history.append(new_entry_txt)
182
  logging.debug(f"Updated history: {updated_history}")
183
 
184
+ # Riabilita sempre il pulsante dopo la generazione
185
+ return updated_history, gr.update(interactive=True), update_dataframe(updated_history), audio_editor_update, original_audio_update
186
 
187
  # ... (tutte le altre funzioni helper come save_srt_changes, etc. rimangono qui)
188
 
 
218
  except FileNotFoundError: VERSION = "1.0.0"
219
  BADGE = f"<span style='background:#1976d2;color:white;padding:2px 8px;border-radius:8px;font-size:0.9em;margin-left:8px;'>v{VERSION}</span>"
220
 
221
+ # Loader HTML come template
222
+ LOADER_HTML_ON = """
223
+ <div id='subtitle-loader' style='display:block;text-align:center;margin-top:0.5em;'>
224
+ <span style='display:inline-block;width:24px;height:24px;border:3px solid #1976d2;border-radius:50%;border-top:3px solid transparent;animation:spin 1s linear infinite;vertical-align:middle;'></span>
225
+ <span style='color:#1976d2;margin-left:8px;'>Generazione sottotitoli in corso...</span>
226
+ </div>
227
+ <style>@keyframes spin{0%{transform:rotate(0deg);}100%{transform:rotate(360deg);}}</style>
228
+ """
229
+ LOADER_HTML_OFF = """
230
+ <div id='subtitle-loader' style='display:none;text-align:center;margin-top:0.5em;'>
231
+ <span style='display:inline-block;width:24px;height:24px;border:3px solid #1976d2;border-radius:50%;border-top:3px solid transparent;animation:spin 1s linear infinite;vertical-align:middle;'></span>
232
+ <span style='color:#1976d2;margin-left:8px;'>Generazione sottotitoli in corso...</span>
233
+ </div>
234
+ <style>@keyframes spin{0%{transform:rotate(0deg);}100%{transform:rotate(360deg);}}</style>
235
+ """
236
+
237
  with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<script>{js_loader_script}</script>") as demo:
238
  srt_history_state = gr.State([])
239
  selected_srt_path_state = gr.State(None)
240
  original_audio_path_state = gr.State()
241
 
242
+ gr.Markdown(f"<h1>Transcribe Speech {BADGE}</h1>")
243
 
244
  gr.Markdown("### 1. Carica un file")
245
  video_input = gr.File(label="Carica un file video o audio", file_types=["video", "audio"])
 
254
  api_key_input = gr.Textbox(label="API Key OpenAI", type="password", placeholder="sk-...")
255
  cost_estimate = gr.Markdown()
256
  words_slider = gr.Slider(minimum=6, maximum=15, value=7, step=1, label="Parole per sottotitolo")
257
+ # --- LOADER HTML SEMPRE PRESENTE SOPRA IL PULSANTE ---
258
+ loader_html = gr.HTML(LOADER_HTML_OFF)
259
  submit_btn = gr.Button("▶️ Genera Sottotitoli", variant="primary")
260
  stop_btn = gr.Button("⏹️ Arresta", variant="stop", visible=False)
261
  loader = gr.HTML("""<div id="loader-container" style='text-align:center; display:none; margin-top:1rem;'><div style='display:inline-block; position:relative; width:50px; height:50px;'><svg width='50' height='50' viewBox='0 0 50 50'><circle cx='25' cy='25' r='20' fill='none' stroke='#1976d2' stroke-width='5' stroke-linecap='round' stroke-dasharray='100' stroke-dashoffset='60'><animateTransform attributeName='transform' type='rotate' from='0 25 25' to='360 25 25' dur='1.5s' repeatCount='indefinite'/></circle></svg><div id='timer' style='position:absolute; top:50%; left:50%; transform:translate(-50%,-50%); font-size:0.9em; color:#1976d2;'>0s</div></div></div>""")
 
263
  with gr.Column(scale=2):
264
  gr.Markdown("### 3. Anteprima ed Editor")
265
  video_preview = gr.Video(label="Anteprima Video/Audio Originale", interactive=False)
266
+ audio_output = gr.Audio(label="Editor Traccia Audio", editable=True, type="filepath", visible=False)
267
+ download_audio_btn = gr.Button("⬇️ Download Audio", variant="primary")
268
+ audio_download_file = gr.File(label="Scarica Audio", visible=False)
269
+ undo_audio_btn = gr.Button("↩️ Ripristina Audio Originale")
270
+ final_video = gr.Video(label="Video Finale con Sottotitoli", interactive=False, visible=False)
271
+ final_video_loader = gr.HTML("""
272
+ <div id='final-video-loader' style='display:none;text-align:center;margin-top:0.5em;'>
273
+ <span style='display:inline-block;width:24px;height:24px;border:3px solid #1976d2;border-radius:50%;border-top:3px solid transparent;animation:spin 1s linear infinite;vertical-align:middle;'></span>
274
+ <span style='color:#1976d2;margin-left:8px;'>Caricamento video sottotitolato...</span>
275
+ </div>
276
+ <style>@keyframes spin{0%{transform:rotate(0deg);}100%{transform:rotate(360deg);}}</style>
277
+ """)
278
 
279
  with gr.Column():
280
  gr.Markdown("--- \n### 4. Cronologia e Azioni sui Sottotitoli\n*Seleziona una riga per attivare le azioni.*")
281
+ history_df = gr.Dataframe(headers=["File SRT", "Libreria", "Tipologia SRT", "Orario Generazione", "Video Unito", "Orario Unione"], interactive=True)
282
  with gr.Row(visible=False) as action_buttons:
283
  edit_btn = gr.Button("📝 Modifica SRT")
284
  merge_btn = gr.Button("🎬 Unisci al Video", variant="secondary")
285
  delete_btn = gr.Button("🗑️ Elimina", variant="stop")
286
+ download_btn = gr.Button("⬇️ Download SRT", variant="primary")
287
+ srt_download_file = gr.File(label="Scarica SRT", visible=False)
288
  with gr.Accordion("Editor Testo Sottotitoli", open=False, visible=False) as srt_editor_accordion:
289
  srt_editor_box = gr.Textbox(lines=15, label="Contenuto file .srt", show_copy_button=True, interactive=True)
290
  save_edit_btn = gr.Button("💾 Salva Modifiche", variant="primary")
 
293
 
294
  # MODIFICATA: Logica semplificata e robusta
295
  def show_main_controls(file_obj):
296
+ import mimetypes
297
+ import shutil
298
  if file_obj:
299
+ file_path = file_obj.name
300
+ mime, _ = mimetypes.guess_type(file_path)
301
+ is_video = mime and mime.startswith("video")
302
+ is_audio = mime and mime.startswith("audio")
303
+ video_preview_update = gr.update(visible=is_video, value=file_path if is_video else None)
304
+ submit_btn_update = gr.update(interactive=True)
305
+ main_panel_update = gr.update(visible=True)
306
+ if is_audio:
307
+ output_dir = os.path.join(os.getcwd(), "output")
308
+ os.makedirs(output_dir, exist_ok=True)
309
+ ext = os.path.splitext(file_path)[1].lower()
310
+ if ext not in [".wav", ".mp3", ".flac", ".ogg"]:
311
+ gr.Error("Formato audio non supportato. Usa WAV, MP3, FLAC o OGG.")
312
+ return gr.update(visible=False, value=None), gr.update(visible=False), gr.update(interactive=False), gr.update(visible=False, value=None)
313
+ base_name = os.path.splitext(os.path.basename(file_path))[0]
314
+ timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
315
+ new_audio_name = f"{base_name}_{timestamp}{ext}"
316
+ new_audio_path = os.path.join(output_dir, new_audio_name)
317
+ shutil.copy(file_path, new_audio_path)
318
+ rel_audio_path = os.path.relpath(new_audio_path, os.getcwd())
319
+ video_preview_update = gr.update(visible=False, value=None)
320
+ audio_output_update = gr.update(value=rel_audio_path, visible=True)
321
+ return video_preview_update, main_panel_update, submit_btn_update, audio_output_update
322
+ return video_preview_update, main_panel_update, submit_btn_update, gr.update(visible=False, value=None)
323
+ return gr.update(visible=False, value=None), gr.update(visible=False), gr.update(interactive=False), gr.update(visible=False, value=None)
324
 
325
  def on_select_srt(history_data, evt: gr.SelectData):
326
  if evt.index is None:
 
375
 
376
  # --- CABLAGGIO EVENTI ---
377
 
378
+ video_input.upload(
379
+ fn=show_main_controls,
380
+ inputs=video_input,
381
+ outputs=[video_preview, main_panel, submit_btn, audio_output],
382
+ )
383
 
384
  extract_audio_btn.click(
385
  fn=extract_audio_only,
386
  inputs=[video_input],
387
+ outputs=[audio_output, original_audio_path_state],
388
  )
389
 
390
  undo_audio_btn.click(
 
399
  outputs=openai_options,
400
  )
401
 
402
+ # Loader HTML sotto il pulsante
403
+ loader_html = gr.HTML(LOADER_HTML_OFF)
404
+
405
+ # Funzione wrapper per mostrare/nascondere loader e disabilitare/abilitare il pulsante
406
+ def transcribe_with_loader(*args):
407
+ from gradio import update
408
+ # Mostra loader (display:block)
409
+ yield None, update(interactive=False), None, gr.update(value=LOADER_HTML_ON), update(visible=True), None, gr.update(value=LOADER_HTML_ON)
410
+ # Esegui la funzione vera
411
+ result = transcribe(*args)
412
+ audio_update = result[3] if result[3] is not None else update(visible=False, value=None)
413
+ # Nascondi loader a fine processo (display:none)
414
+ yield result[0], update(interactive=True), result[2], gr.update(value=LOADER_HTML_OFF), audio_update, result[4], gr.update(value=LOADER_HTML_OFF)
415
+
416
+ # Modifica il submit_btn.click per usare la funzione wrapper e i nuovi output
417
  submit_btn.click(
418
+ fn=transcribe_with_loader,
419
  inputs=[
420
  video_input,
421
  audio_output,
 
424
  words_slider,
425
  srt_history_state,
426
  ],
427
+ outputs=[srt_history_state, submit_btn, history_df, loader_html, audio_output, original_audio_path_state, loader_html],
428
+ queue=True,
429
+ show_progress=False,
430
  )
431
 
432
  # Aggiorna il cablaggio eventi per history_df
 
454
  outputs=[srt_editor_accordion] # Rende visibile l'accordion
455
  )
456
 
457
+ # --- FIX: Salva modifiche SRT ---
458
+ save_edit_btn.click(
459
+ fn=lambda srt_path, new_content: (save_srt_changes(srt_path, new_content), gr.update(interactive=False)),
460
+ inputs=[selected_srt_path_state, srt_editor_box],
461
+ outputs=[save_edit_btn],
462
+ )
463
+
464
+ # --- FIX: Abilita/disabilita il pulsante Salva solo se ci sono modifiche ---
465
+ def enable_save_btn(srt_path, new_content):
466
+ if not srt_path or not os.path.exists(srt_path):
467
+ return gr.update(interactive=False)
468
+ try:
469
+ with open(srt_path, 'r', encoding='utf-8') as f:
470
+ original = f.read()
471
+ if original != new_content:
472
+ return gr.update(interactive=True)
473
+ else:
474
+ return gr.update(interactive=False)
475
+ except Exception:
476
+ return gr.update(interactive=False)
477
+
478
+ srt_editor_box.change(
479
+ fn=enable_save_btn,
480
+ inputs=[selected_srt_path_state, srt_editor_box],
481
+ outputs=[save_edit_btn],
482
+ )
483
+ # Disabilita il pulsante Salva quando si seleziona un nuovo file
484
+ edit_btn.click(
485
+ fn=lambda: gr.update(interactive=False),
486
+ inputs=[],
487
+ outputs=[save_edit_btn],
488
+ )
489
  # Aggiorna il cablaggio eventi per merge_btn
490
  merge_btn.click(
491
+ fn=lambda video_path, srt_path: (
492
+ gr.update(visible=True), # Mostra loader
493
+ gr.update(visible=False), # Nascondi il player video
494
+ ),
495
  inputs=[video_input, selected_srt_path_state],
496
+ outputs=[final_video, final_video_loader],
497
+ queue=True,
498
+ show_progress=False,
499
+ )
500
+ merge_btn.click(
501
+ fn=lambda video_path, srt_path: (
502
+ gr.update(visible=True, value=merge_subtitles(video_path, srt_path)[0]), # Mostra video
503
+ gr.update(visible=False), # Nascondi loader
504
+ ),
505
+ inputs=[video_input, selected_srt_path_state],
506
+ outputs=[final_video, final_video_loader],
507
+ queue=True,
508
+ show_progress=False,
509
  )
510
 
511
  # Rende visibile il video finale quando viene cliccato il pulsante
 
515
  outputs=[final_video] # Rende visibile il componente del video finale
516
  )
517
 
518
+ # Riabilita il pulsante 'Genera Sottotitoli' dopo l'unione
519
+ merge_btn.click(
520
+ fn=lambda: gr.update(interactive=True),
521
+ inputs=[],
522
+ outputs=[submit_btn]
523
+ )
524
+
525
  # Aggiorna il cablaggio eventi per delete_btn
526
  delete_btn.click(
527
  fn=delete_selected,
 
529
  outputs=[srt_history_state, action_buttons] # Update history and hide action buttons
530
  )
531
 
532
+ # Download SRT: mostra il file selezionato come download
533
+ download_btn.click(
534
+ fn=lambda srt_path: gr.update(value=srt_path, visible=True) if srt_path and os.path.exists(srt_path) else gr.update(visible=False),
535
+ inputs=[selected_srt_path_state],
536
+ outputs=[srt_download_file],
537
+ )
538
+
539
+ # Download Audio: mostra il file audio corrente come download
540
+ download_audio_btn.click(
541
+ fn=lambda audio_path: gr.update(value=audio_path, visible=True) if audio_path and os.path.exists(audio_path) else gr.update(visible=False),
542
+ inputs=[audio_output],
543
+ outputs=[audio_download_file],
544
+ )
545
+
546
+
547
+ # --- PULIZIA FILE DI OUTPUT ALL'AVVIO ---
548
+ def clean_output_dirs():
549
+ for folder in ["output", os.path.join("output", "subtitles")]:
550
+ if os.path.exists(folder):
551
+ for filename in os.listdir(folder):
552
+ file_path = os.path.join(folder, filename)
553
+ try:
554
+ if os.path.isfile(file_path) or os.path.islink(file_path):
555
+ os.unlink(file_path)
556
+ elif os.path.isdir(file_path):
557
+ shutil.rmtree(file_path)
558
+ except Exception as e:
559
+ print(f"Errore durante la cancellazione di {file_path}: {e}")
560
+
561
+ clean_output_dirs()
562
 
563
  if __name__ == "__main__":
564
  demo.queue().launch() # Rimosso `share=True` per eseguire l'app localmente
manifest.json CHANGED
@@ -1,3 +1,3 @@
1
  {
2
- "version": "1.1.0"
3
  }
 
1
  {
2
+ "version": "1.2.0"
3
  }
output/Audio 2_ Take 2 mp3cut.net_20250623192512.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:305fbc33f6c73d5999d4f95939dda56fc461e5b277cb685cc5ad9bd1f95a42b1
3
+ size 10645622
requirements.txt CHANGED
@@ -2,5 +2,5 @@ gradio
2
  pandas
3
  faster-whisper
4
  moviepy==1.0.3
5
- openai
6
  ffmpeg-python # Aggiunto per robustezza, anche se usiamo subprocess
 
2
  pandas
3
  faster-whisper
4
  moviepy==1.0.3
5
+ openai>=1.0.0
6
  ffmpeg-python # Aggiunto per robustezza, anche se usiamo subprocess
src/__pycache__/subtitle_extractor.cpython-313.pyc CHANGED
Binary files a/src/__pycache__/subtitle_extractor.cpython-313.pyc and b/src/__pycache__/subtitle_extractor.cpython-313.pyc differ
 
src/subtitle_extractor.py CHANGED
@@ -8,6 +8,8 @@ import subprocess
8
  from dataclasses import dataclass
9
  from typing import List, Optional
10
 
 
 
11
  # MoviePy is an optional dependency used when extracting audio. It is imported
12
  # lazily to avoid issues when running in environments where it is not
13
  # available (for instance during unit tests).
@@ -19,6 +21,8 @@ except ImportError: # pragma: no cover - optional dependency
19
 
20
  logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
21
 
 
 
22
 
23
  def format_timestamp(seconds: float) -> str:
24
  """Return timestamp in SRT format."""
@@ -63,43 +67,109 @@ def _segments_to_srt(segments: List[SubtitleLine]) -> str:
63
  return "\n".join(lines)
64
 
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  def transcribe_audio(
67
  audio_path: str,
68
  library: str = "faster_whisper",
69
  api_key: Optional[str] = None,
70
  model_size: str = "base",
71
  words_per_sub: int = 7,
72
- ) -> str:
73
- """Transcribe *audio_path* and return SRT content."""
74
  logging.debug(f"Starting transcription with library: {library}, audio_path: {audio_path}")
75
 
 
76
  if library == "OpenAI Whisper":
77
  if api_key is None:
78
  raise ValueError("api_key is required for OpenAI Whisper")
79
  import openai
80
 
81
  openai.api_key = api_key
82
- logging.debug("Calling OpenAI Whisper API...")
83
- with open(audio_path, "rb") as audio_file:
84
- result = openai.Audio.transcribe(
85
- model="whisper-1",
86
- file=audio_file,
87
- response_format="json",
88
- )
89
- logging.debug(f"OpenAI API response: {result}")
90
- words = result.get("text", "").split()
91
- if not words:
92
- logging.error("No text returned by OpenAI Whisper API.")
93
- raise ValueError("No text returned by OpenAI Whisper API.")
94
- segments = []
95
- start = 0.0
96
- step = 3.0
97
- for i in range(0, len(words), words_per_sub):
98
- end = start + step
99
- text = " ".join(words[i : i + words_per_sub])
100
- segments.append(SubtitleLine(start=start, end=end, text=text))
101
- start = end
102
- logging.debug(f"Generated segments: {segments}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  else:
104
  if WhisperModel is None:
105
  raise RuntimeError("faster_whisper is not installed")
@@ -107,6 +177,7 @@ def transcribe_audio(
107
  model = WhisperModel(model_size)
108
  segs = model.transcribe(audio_path)[0]
109
  segments = [SubtitleLine(start=s.start, end=s.end, text=s.text) for s in segs]
 
110
  logging.debug(f"Generated segments: {segments}")
111
 
112
  if not segments:
@@ -115,7 +186,7 @@ def transcribe_audio(
115
 
116
  srt_content = _segments_to_srt(segments)
117
  logging.debug(f"Generated SRT content: {srt_content}")
118
- return srt_content
119
 
120
 
121
  def save_srt(content: str, output_path: str) -> str:
@@ -124,6 +195,12 @@ def save_srt(content: str, output_path: str) -> str:
124
  return output_path
125
 
126
 
 
 
 
 
 
 
127
  def merge_subtitles(video_path: str, srt_path: str, output_path: str) -> str:
128
  command = [
129
  "ffmpeg",
 
8
  from dataclasses import dataclass
9
  from typing import List, Optional
10
 
11
+ from pydub import AudioSegment
12
+
13
  # MoviePy is an optional dependency used when extracting audio. It is imported
14
  # lazily to avoid issues when running in environments where it is not
15
  # available (for instance during unit tests).
 
21
 
22
  logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
23
 
24
+ MAX_OPENAI_AUDIO_SIZE = 25 * 1024 * 1024 # 25 MB
25
+
26
 
27
  def format_timestamp(seconds: float) -> str:
28
  """Return timestamp in SRT format."""
 
67
  return "\n".join(lines)
68
 
69
 
70
+ def _export_and_transcribe_segment(seg, idx, audio_path, openai, words_per_sub, time_offset):
71
+ """Esporta un segmento in MP3, verifica la dimensione e lo suddivide ricorsivamente se necessario."""
72
+ import tempfile
73
+ segment_list = []
74
+ txt_list = []
75
+ with tempfile.NamedTemporaryFile(suffix=f"_part{idx}.mp3", delete=False) as temp_file:
76
+ seg.export(temp_file.name, format="mp3")
77
+ temp_size = os.path.getsize(temp_file.name)
78
+ logging.debug(f"Segmento {idx}: dimensione {temp_size} byte (MP3)")
79
+ if temp_size > MAX_OPENAI_AUDIO_SIZE:
80
+ # Suddividi ulteriormente il segmento
81
+ logging.info(f"Segmento {idx} ancora troppo grande, suddivisione ricorsiva...")
82
+ duration_ms = len(seg)
83
+ mid = duration_ms // 2
84
+ seg1 = seg[:mid]
85
+ seg2 = seg[mid:]
86
+ # Ricorsione su ciascuna metà
87
+ segs1, txts1 = _export_and_transcribe_segment(seg1, f"{idx}a", audio_path, openai, words_per_sub, time_offset)
88
+ segs2, txts2 = _export_and_transcribe_segment(seg2, f"{idx}b", audio_path, openai, words_per_sub, time_offset + seg1.duration_seconds)
89
+ segment_list.extend(segs1)
90
+ segment_list.extend(segs2)
91
+ txt_list.extend(txts1)
92
+ txt_list.extend(txts2)
93
+ else:
94
+ with open(temp_file.name, "rb") as audio_file:
95
+ result = openai.audio.transcriptions.create(
96
+ model="whisper-1",
97
+ file=audio_file,
98
+ response_format="json",
99
+ )
100
+ words = result.text.split()
101
+ plain = result.text.strip()
102
+ txt_list.append(plain)
103
+ # Ricostruisci segmenti SRT con offset temporale
104
+ segs = []
105
+ start = time_offset
106
+ step = 3.0
107
+ for i in range(0, len(words), words_per_sub):
108
+ end = start + step
109
+ text = " ".join(words[i : i + words_per_sub])
110
+ segs.append(SubtitleLine(start=start, end=end, text=text))
111
+ start = end
112
+ segment_list.extend(segs)
113
+ os.remove(temp_file.name)
114
+ return segment_list, txt_list
115
+
116
+
117
  def transcribe_audio(
118
  audio_path: str,
119
  library: str = "faster_whisper",
120
  api_key: Optional[str] = None,
121
  model_size: str = "base",
122
  words_per_sub: int = 7,
123
+ ) -> tuple[str, str]:
124
+ """Transcribe *audio_path* and return (SRT content, plain text content)."""
125
  logging.debug(f"Starting transcription with library: {library}, audio_path: {audio_path}")
126
 
127
+ plain_text = None
128
  if library == "OpenAI Whisper":
129
  if api_key is None:
130
  raise ValueError("api_key is required for OpenAI Whisper")
131
  import openai
132
 
133
  openai.api_key = api_key
134
+ # --- Gestione file troppo grandi ---
135
+ if os.path.getsize(audio_path) > MAX_OPENAI_AUDIO_SIZE:
136
+ logging.info("Audio troppo grande, suddivisione in segmenti...")
137
+ audio = AudioSegment.from_file(audio_path)
138
+ duration_ms = len(audio)
139
+ segment_length_ms = 20 * 60 * 1000
140
+ segments = [audio[i : i + segment_length_ms] for i in range(0, duration_ms, segment_length_ms)]
141
+ srt_parts = []
142
+ txt_parts = []
143
+ time_offset = 0.0
144
+ for idx, seg in enumerate(segments):
145
+ segs, txts = _export_and_transcribe_segment(seg, idx, audio_path, openai, words_per_sub, time_offset)
146
+ srt_parts.extend(segs)
147
+ txt_parts.extend(txts)
148
+ time_offset += seg.duration_seconds
149
+ segments = srt_parts
150
+ plain_text = " ".join(txt_parts)
151
+ else:
152
+ with open(audio_path, "rb") as audio_file:
153
+ result = openai.audio.transcriptions.create(
154
+ model="whisper-1",
155
+ file=audio_file,
156
+ response_format="json",
157
+ )
158
+ logging.debug(f"OpenAI API response: {result}")
159
+ words = result.text.split()
160
+ plain_text = result.text.strip()
161
+ if not words:
162
+ logging.error("No text returned by OpenAI Whisper API.")
163
+ raise ValueError("No text returned by OpenAI Whisper API.")
164
+ segments = []
165
+ start = 0.0
166
+ step = 3.0
167
+ for i in range(0, len(words), words_per_sub):
168
+ end = start + step
169
+ text = " ".join(words[i : i + words_per_sub])
170
+ segments.append(SubtitleLine(start=start, end=end, text=text))
171
+ start = end
172
+ logging.debug(f"Generated segments: {segments}")
173
  else:
174
  if WhisperModel is None:
175
  raise RuntimeError("faster_whisper is not installed")
 
177
  model = WhisperModel(model_size)
178
  segs = model.transcribe(audio_path)[0]
179
  segments = [SubtitleLine(start=s.start, end=s.end, text=s.text) for s in segs]
180
+ plain_text = " ".join([s.text.strip() for s in segments])
181
  logging.debug(f"Generated segments: {segments}")
182
 
183
  if not segments:
 
186
 
187
  srt_content = _segments_to_srt(segments)
188
  logging.debug(f"Generated SRT content: {srt_content}")
189
+ return srt_content, plain_text
190
 
191
 
192
  def save_srt(content: str, output_path: str) -> str:
 
195
  return output_path
196
 
197
 
198
+ def save_txt(content: str, output_path: str) -> str:
199
+ with open(output_path, "w", encoding="utf-8") as f:
200
+ f.write(content)
201
+ return output_path
202
+
203
+
204
  def merge_subtitles(video_path: str, srt_path: str, output_path: str) -> str:
205
  command = [
206
  "ffmpeg",