Deploy version v1.2.0
Browse files- .gitattributes +1 -0
- Documentation.md +43 -0
- app.py +238 -59
- manifest.json +1 -1
- output/Audio 2_ Take 2 mp3cut.net_20250623192512.wav +3 -0
- requirements.txt +1 -1
- src/__pycache__/subtitle_extractor.cpython-313.pyc +0 -0
- src/subtitle_extractor.py +101 -24
.gitattributes
CHANGED
|
@@ -36,3 +36,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 36 |
output/romano_subbed_with_romano_faster_whisper.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 37 |
output/romano_subbed_with_romano_openai_whisper.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 38 |
output/romano_audio.mp3 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 36 |
output/romano_subbed_with_romano_faster_whisper.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 37 |
output/romano_subbed_with_romano_openai_whisper.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 38 |
output/romano_audio.mp3 filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
output/Audio[[:space:]]2_[[:space:]]Take[[:space:]]2[[:space:]]mp3cut.net_20250623192512.wav filter=lfs diff=lfs merge=lfs -text
|
Documentation.md
CHANGED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Direzione per lo Sviluppo la Gestione e la Sicurezza dei Sistemi Informativi e l'Innovazione Digitale
|
| 2 |
+
|
| 3 |
+
## Transcribe it - Webapp Gradio
|
| 4 |
+
|
| 5 |
+
### Descrizione
|
| 6 |
+
Questa applicazione web consente di estrarre, modificare e scaricare sottotitoli da file video o audio, con una gestione avanzata dello storico, editor audio, player video custom e robusta gestione degli errori. L'interfaccia è realizzata con Gradio e ottimizzata per la massima semplicità d'uso.
|
| 7 |
+
|
| 8 |
+
### Funzionalità principali
|
| 9 |
+
- **Upload di file video o audio**: supporto a formati comuni (mp4, mp3, wav, ecc.).
|
| 10 |
+
- **Estrazione automatica dell'audio dai video**.
|
| 11 |
+
- **Editor audio integrato**: possibilità di modificare la traccia audio prima della generazione sottotitoli.
|
| 12 |
+
- **Generazione sottotitoli**:
|
| 13 |
+
- Supporto a due motori: Faster Whisper (locale) e OpenAI Whisper (cloud, con gestione API Key).
|
| 14 |
+
- Suddivisione automatica dei file audio troppo grandi (>25MB) in segmenti MP3, con trascrizione e ricostruzione automatica.
|
| 15 |
+
- Salvataggio sia del file SRT (con tempi) che del file TXT (solo testo puro).
|
| 16 |
+
- **Storico processi**:
|
| 17 |
+
- Ogni generazione aggiunge due righe: una per il file SRT, una per il TXT.
|
| 18 |
+
- Colonna "Tipologia SRT" per distinguere tra SRT con tempi e testo puro.
|
| 19 |
+
- Cronologia consultabile e azioni rapide (modifica, download, unione video, elimina).
|
| 20 |
+
- **Player video custom**: anteprima del video originale e del video sottotitolato.
|
| 21 |
+
- **Player audio**: sempre visibile dopo upload/estrazione e durante la generazione sottotitoli.
|
| 22 |
+
- **Editor SRT integrato**: modifica e salvataggio diretto dei sottotitoli.
|
| 23 |
+
- **Unione sottotitoli al video**: generazione automatica del video finale con sottotitoli hardcoded.
|
| 24 |
+
- **Download**: scarica file SRT, TXT e audio modificato.
|
| 25 |
+
- **Loader visivi**: spinner e messaggi di caricamento durante tutte le operazioni lunghe (generazione sottotitoli, merge video, caricamento player).
|
| 26 |
+
- **Gestione robusta degli errori**: feedback chiari e nessun crash anche in caso di input non valido o errori API.
|
| 27 |
+
- **Compatibilità multipiattaforma**: funziona su Windows, macOS e Linux.
|
| 28 |
+
|
| 29 |
+
### Requisiti
|
| 30 |
+
- Python 3.8+
|
| 31 |
+
- Dipendenze principali: gradio, moviepy, pydub, pandas, faster-whisper, openai
|
| 32 |
+
|
| 33 |
+
### Avvio rapido
|
| 34 |
+
1. Installa le dipendenze: `pip install -r requirements.txt`
|
| 35 |
+
2. Avvia l'app: `python app.py`
|
| 36 |
+
3. Accedi all'interfaccia web tramite il link fornito in console.
|
| 37 |
+
|
| 38 |
+
### Note di sicurezza
|
| 39 |
+
- La chiave API OpenAI non viene salvata e viene usata solo per la sessione corrente.
|
| 40 |
+
- I file temporanei vengono eliminati automaticamente al termine della sessione.
|
| 41 |
+
|
| 42 |
+
---
|
| 43 |
+
|
app.py
CHANGED
|
@@ -3,10 +3,10 @@ import os
|
|
| 3 |
import json
|
| 4 |
import logging
|
| 5 |
from moviepy.editor import VideoFileClip, AudioFileClip
|
|
|
|
| 6 |
|
| 7 |
from src.subtitle_extractor import transcribe_audio, save_srt
|
| 8 |
import time
|
| 9 |
-
import shutil
|
| 10 |
import subprocess
|
| 11 |
from datetime import datetime
|
| 12 |
import pandas as pd
|
|
@@ -37,30 +37,20 @@ def format_timestamp(seconds):
|
|
| 37 |
def extract_audio_only(video_path, progress=gr.Progress(track_tqdm=True)):
|
| 38 |
if not video_path:
|
| 39 |
gr.Warning("Carica prima un video per estrarre l'audio.")
|
| 40 |
-
|
| 41 |
-
return None, None, gr.update(visible=False)
|
| 42 |
try:
|
| 43 |
gr.Info("Estrazione audio in corso...")
|
| 44 |
video = VideoFileClip(video_path)
|
| 45 |
-
|
| 46 |
-
output_dir = os.path.join(os.getcwd(), "output") # Salva ancora nella cartella temporanea definita all'inizio
|
| 47 |
os.makedirs(output_dir, exist_ok=True)
|
| 48 |
-
|
| 49 |
base_name = os.path.splitext(os.path.basename(video_path))[0]
|
| 50 |
audio_filename = os.path.join(output_dir, f"{base_name}_audio.mp3")
|
| 51 |
-
|
| 52 |
video.audio.write_audiofile(audio_filename, logger=None)
|
| 53 |
-
|
| 54 |
gr.Info("Estrazione audio completata.")
|
| 55 |
-
|
| 56 |
-
# --- LA RIGA CORRETTA È QUESTA ---
|
| 57 |
-
# Ora restituisce 3 valori: il player, lo stato per l'undo, e la visibilità del gruppo
|
| 58 |
-
return gr.update(value=audio_filename, visible=True), audio_filename, gr.update(visible=True)
|
| 59 |
-
|
| 60 |
except Exception as e:
|
| 61 |
gr.Error(f"Errore durante l'estrazione dell'audio: {e}")
|
| 62 |
-
|
| 63 |
-
return None, None, gr.update(visible=False)
|
| 64 |
def merge_subtitles(video_path, srt_path, progress=gr.Progress(track_tqdm=True)):
|
| 65 |
if not video_path or not srt_path:
|
| 66 |
gr.Warning("Percorso video o sottotitoli mancante!"); return None, None
|
|
@@ -84,42 +74,61 @@ def transcribe(video_path, edited_audio_path, library, api_key, words_per_sub, c
|
|
| 84 |
global stop_requested
|
| 85 |
if stop_requested:
|
| 86 |
logging.warning("Transcription stopped by user.")
|
| 87 |
-
return current_history, gr.update(interactive=True), update_dataframe(current_history)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
audio_source = None
|
|
|
|
|
|
|
| 90 |
if edited_audio_path and os.path.exists(edited_audio_path):
|
| 91 |
logging.info("Using edited audio for transcription.")
|
| 92 |
audio_source = edited_audio_path
|
|
|
|
|
|
|
| 93 |
elif video_path and os.path.exists(video_path):
|
| 94 |
logging.info("Extracting audio from original video for transcription...")
|
| 95 |
try:
|
| 96 |
video = VideoFileClip(video_path)
|
| 97 |
-
|
| 98 |
-
video.audio.write_audiofile(
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
except Exception as e:
|
| 101 |
logging.error(f"Error extracting audio: {e}")
|
| 102 |
-
return current_history, gr.update(interactive=True), update_dataframe(current_history)
|
| 103 |
else:
|
| 104 |
logging.error("No valid video or audio source provided.")
|
| 105 |
-
return current_history, gr.update(interactive=True), update_dataframe(current_history)
|
| 106 |
|
| 107 |
try:
|
| 108 |
if library == "OpenAI Whisper":
|
| 109 |
-
if not api_key:
|
| 110 |
-
logging.error("Missing OpenAI API Key.")
|
| 111 |
-
gr.Error("API Key OpenAI mancante.")
|
| 112 |
-
return current_history, gr.update(interactive=True), update_dataframe(current_history)
|
| 113 |
logging.info("Using OpenAI Whisper for transcription.")
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
else:
|
| 121 |
logging.info("Using Faster Whisper for transcription.")
|
| 122 |
-
srt_content = transcribe_audio(
|
| 123 |
audio_source,
|
| 124 |
library="faster_whisper",
|
| 125 |
api_key=None,
|
|
@@ -129,37 +138,51 @@ def transcribe(video_path, edited_audio_path, library, api_key, words_per_sub, c
|
|
| 129 |
except Exception as e:
|
| 130 |
logging.error(f"Error during transcription: {e}")
|
| 131 |
gr.Error(f"Errore trascrizione: {e}")
|
| 132 |
-
return current_history, gr.update(interactive=True), update_dataframe(current_history)
|
| 133 |
|
| 134 |
base_name = os.path.splitext(os.path.basename(video_path or audio_source))[0]
|
| 135 |
-
|
|
|
|
|
|
|
| 136 |
try:
|
| 137 |
save_srt(srt_content, srt_filename)
|
| 138 |
-
|
|
|
|
|
|
|
| 139 |
except Exception as e:
|
| 140 |
-
logging.error(f"Error saving SRT file: {e}")
|
| 141 |
-
return current_history, gr.update(interactive=True), update_dataframe(current_history)
|
| 142 |
|
| 143 |
if audio_source.startswith(TEMP_DIR) and os.path.basename(audio_source) == "temp_transcribe_audio.wav":
|
| 144 |
os.remove(audio_source)
|
| 145 |
logging.info("Temporary audio file removed.")
|
| 146 |
|
| 147 |
elapsed_time = time.time() - start_time
|
| 148 |
-
|
| 149 |
"File SRT": os.path.basename(srt_filename),
|
| 150 |
"Libreria": library,
|
| 151 |
-
"
|
| 152 |
"Percorso Completo": srt_filename,
|
| 153 |
"Video Unito": None,
|
| 154 |
"Orario Generazione": datetime.now().strftime("%H:%M:%S"),
|
| 155 |
"Orario Unione": "",
|
| 156 |
}
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
logging.debug(f"Updated history: {updated_history}")
|
| 161 |
|
| 162 |
-
|
|
|
|
| 163 |
|
| 164 |
# ... (tutte le altre funzioni helper come save_srt_changes, etc. rimangono qui)
|
| 165 |
|
|
@@ -195,12 +218,28 @@ try:
|
|
| 195 |
except FileNotFoundError: VERSION = "1.0.0"
|
| 196 |
BADGE = f"<span style='background:#1976d2;color:white;padding:2px 8px;border-radius:8px;font-size:0.9em;margin-left:8px;'>v{VERSION}</span>"
|
| 197 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<script>{js_loader_script}</script>") as demo:
|
| 199 |
srt_history_state = gr.State([])
|
| 200 |
selected_srt_path_state = gr.State(None)
|
| 201 |
original_audio_path_state = gr.State()
|
| 202 |
|
| 203 |
-
gr.Markdown(f"<h1>
|
| 204 |
|
| 205 |
gr.Markdown("### 1. Carica un file")
|
| 206 |
video_input = gr.File(label="Carica un file video o audio", file_types=["video", "audio"])
|
|
@@ -215,6 +254,8 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
|
|
| 215 |
api_key_input = gr.Textbox(label="API Key OpenAI", type="password", placeholder="sk-...")
|
| 216 |
cost_estimate = gr.Markdown()
|
| 217 |
words_slider = gr.Slider(minimum=6, maximum=15, value=7, step=1, label="Parole per sottotitolo")
|
|
|
|
|
|
|
| 218 |
submit_btn = gr.Button("▶️ Genera Sottotitoli", variant="primary")
|
| 219 |
stop_btn = gr.Button("⏹️ Arresta", variant="stop", visible=False)
|
| 220 |
loader = gr.HTML("""<div id="loader-container" style='text-align:center; display:none; margin-top:1rem;'><div style='display:inline-block; position:relative; width:50px; height:50px;'><svg width='50' height='50' viewBox='0 0 50 50'><circle cx='25' cy='25' r='20' fill='none' stroke='#1976d2' stroke-width='5' stroke-linecap='round' stroke-dasharray='100' stroke-dashoffset='60'><animateTransform attributeName='transform' type='rotate' from='0 25 25' to='360 25 25' dur='1.5s' repeatCount='indefinite'/></circle></svg><div id='timer' style='position:absolute; top:50%; left:50%; transform:translate(-50%,-50%); font-size:0.9em; color:#1976d2;'>0s</div></div></div>""")
|
|
@@ -222,18 +263,28 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
|
|
| 222 |
with gr.Column(scale=2):
|
| 223 |
gr.Markdown("### 3. Anteprima ed Editor")
|
| 224 |
video_preview = gr.Video(label="Anteprima Video/Audio Originale", interactive=False)
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
|
| 230 |
with gr.Column():
|
| 231 |
gr.Markdown("--- \n### 4. Cronologia e Azioni sui Sottotitoli\n*Seleziona una riga per attivare le azioni.*")
|
| 232 |
-
history_df = gr.Dataframe(headers=["File SRT", "Libreria", "Orario Generazione", "Video Unito", "Orario Unione"], interactive=True)
|
| 233 |
with gr.Row(visible=False) as action_buttons:
|
| 234 |
edit_btn = gr.Button("📝 Modifica SRT")
|
| 235 |
merge_btn = gr.Button("🎬 Unisci al Video", variant="secondary")
|
| 236 |
delete_btn = gr.Button("🗑️ Elimina", variant="stop")
|
|
|
|
|
|
|
| 237 |
with gr.Accordion("Editor Testo Sottotitoli", open=False, visible=False) as srt_editor_accordion:
|
| 238 |
srt_editor_box = gr.Textbox(lines=15, label="Contenuto file .srt", show_copy_button=True, interactive=True)
|
| 239 |
save_edit_btn = gr.Button("💾 Salva Modifiche", variant="primary")
|
|
@@ -242,11 +293,34 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
|
|
| 242 |
|
| 243 |
# MODIFICATA: Logica semplificata e robusta
|
| 244 |
def show_main_controls(file_obj):
|
|
|
|
|
|
|
| 245 |
if file_obj:
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
|
| 251 |
def on_select_srt(history_data, evt: gr.SelectData):
|
| 252 |
if evt.index is None:
|
|
@@ -301,12 +375,16 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
|
|
| 301 |
|
| 302 |
# --- CABLAGGIO EVENTI ---
|
| 303 |
|
| 304 |
-
video_input.upload(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
|
| 306 |
extract_audio_btn.click(
|
| 307 |
fn=extract_audio_only,
|
| 308 |
inputs=[video_input],
|
| 309 |
-
outputs=[audio_output, original_audio_path_state
|
| 310 |
)
|
| 311 |
|
| 312 |
undo_audio_btn.click(
|
|
@@ -321,8 +399,23 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
|
|
| 321 |
outputs=openai_options,
|
| 322 |
)
|
| 323 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
submit_btn.click(
|
| 325 |
-
fn=
|
| 326 |
inputs=[
|
| 327 |
video_input,
|
| 328 |
audio_output,
|
|
@@ -331,7 +424,9 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
|
|
| 331 |
words_slider,
|
| 332 |
srt_history_state,
|
| 333 |
],
|
| 334 |
-
outputs=[srt_history_state, submit_btn, history_df],
|
|
|
|
|
|
|
| 335 |
)
|
| 336 |
|
| 337 |
# Aggiorna il cablaggio eventi per history_df
|
|
@@ -359,11 +454,58 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
|
|
| 359 |
outputs=[srt_editor_accordion] # Rende visibile l'accordion
|
| 360 |
)
|
| 361 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
# Aggiorna il cablaggio eventi per merge_btn
|
| 363 |
merge_btn.click(
|
| 364 |
-
fn=
|
|
|
|
|
|
|
|
|
|
| 365 |
inputs=[video_input, selected_srt_path_state],
|
| 366 |
-
outputs=[final_video
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
)
|
| 368 |
|
| 369 |
# Rende visibile il video finale quando viene cliccato il pulsante
|
|
@@ -373,6 +515,13 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
|
|
| 373 |
outputs=[final_video] # Rende visibile il componente del video finale
|
| 374 |
)
|
| 375 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
# Aggiorna il cablaggio eventi per delete_btn
|
| 377 |
delete_btn.click(
|
| 378 |
fn=delete_selected,
|
|
@@ -380,6 +529,36 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
|
|
| 380 |
outputs=[srt_history_state, action_buttons] # Update history and hide action buttons
|
| 381 |
)
|
| 382 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 383 |
|
| 384 |
if __name__ == "__main__":
|
| 385 |
demo.queue().launch() # Rimosso `share=True` per eseguire l'app localmente
|
|
|
|
| 3 |
import json
|
| 4 |
import logging
|
| 5 |
from moviepy.editor import VideoFileClip, AudioFileClip
|
| 6 |
+
import shutil
|
| 7 |
|
| 8 |
from src.subtitle_extractor import transcribe_audio, save_srt
|
| 9 |
import time
|
|
|
|
| 10 |
import subprocess
|
| 11 |
from datetime import datetime
|
| 12 |
import pandas as pd
|
|
|
|
| 37 |
def extract_audio_only(video_path, progress=gr.Progress(track_tqdm=True)):
|
| 38 |
if not video_path:
|
| 39 |
gr.Warning("Carica prima un video per estrarre l'audio.")
|
| 40 |
+
return gr.update(visible=False, value=None), None, gr.update(visible=False, value=None)
|
|
|
|
| 41 |
try:
|
| 42 |
gr.Info("Estrazione audio in corso...")
|
| 43 |
video = VideoFileClip(video_path)
|
| 44 |
+
output_dir = os.path.join(os.getcwd(), "output")
|
|
|
|
| 45 |
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
| 46 |
base_name = os.path.splitext(os.path.basename(video_path))[0]
|
| 47 |
audio_filename = os.path.join(output_dir, f"{base_name}_audio.mp3")
|
|
|
|
| 48 |
video.audio.write_audiofile(audio_filename, logger=None)
|
|
|
|
| 49 |
gr.Info("Estrazione audio completata.")
|
| 50 |
+
return gr.update(value=audio_filename, visible=True), audio_filename, gr.update(visible=True, value=audio_filename)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
except Exception as e:
|
| 52 |
gr.Error(f"Errore durante l'estrazione dell'audio: {e}")
|
| 53 |
+
return gr.update(visible=False, value=None), None, gr.update(visible=False, value=None)
|
|
|
|
| 54 |
def merge_subtitles(video_path, srt_path, progress=gr.Progress(track_tqdm=True)):
|
| 55 |
if not video_path or not srt_path:
|
| 56 |
gr.Warning("Percorso video o sottotitoli mancante!"); return None, None
|
|
|
|
| 74 |
global stop_requested
|
| 75 |
if stop_requested:
|
| 76 |
logging.warning("Transcription stopped by user.")
|
| 77 |
+
return current_history, gr.update(interactive=True), update_dataframe(current_history), None, None
|
| 78 |
+
|
| 79 |
+
# --- VALIDAZIONE API KEY ---
|
| 80 |
+
if library == "OpenAI Whisper" and (not api_key or not api_key.strip()):
|
| 81 |
+
gr.Error("Devi inserire la API Key OpenAI per usare questa modalità.")
|
| 82 |
+
return current_history, gr.update(interactive=True), update_dataframe(current_history), None, None
|
| 83 |
|
| 84 |
audio_source = None
|
| 85 |
+
audio_editor_update = None
|
| 86 |
+
original_audio_update = None
|
| 87 |
if edited_audio_path and os.path.exists(edited_audio_path):
|
| 88 |
logging.info("Using edited audio for transcription.")
|
| 89 |
audio_source = edited_audio_path
|
| 90 |
+
audio_editor_update = gr.update(value=edited_audio_path, visible=True)
|
| 91 |
+
original_audio_update = edited_audio_path
|
| 92 |
elif video_path and os.path.exists(video_path):
|
| 93 |
logging.info("Extracting audio from original video for transcription...")
|
| 94 |
try:
|
| 95 |
video = VideoFileClip(video_path)
|
| 96 |
+
temp_audio_path = os.path.join(TEMP_DIR, "temp_transcribe_audio.wav")
|
| 97 |
+
video.audio.write_audiofile(temp_audio_path, logger=None)
|
| 98 |
+
output_dir = os.path.join(os.getcwd(), "output")
|
| 99 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 100 |
+
base_name = os.path.splitext(os.path.basename(video_path))[0]
|
| 101 |
+
audio_filename = os.path.join(output_dir, f"{base_name}_audio_gradio.wav")
|
| 102 |
+
import shutil
|
| 103 |
+
shutil.copy(temp_audio_path, audio_filename)
|
| 104 |
+
rel_audio_path = os.path.relpath(audio_filename, os.getcwd())
|
| 105 |
+
audio_source = audio_filename
|
| 106 |
+
audio_editor_update = gr.update(value=rel_audio_path, visible=True)
|
| 107 |
+
original_audio_update = rel_audio_path
|
| 108 |
except Exception as e:
|
| 109 |
logging.error(f"Error extracting audio: {e}")
|
| 110 |
+
return current_history, gr.update(interactive=True), update_dataframe(current_history), gr.update(visible=False, value=None), None, None
|
| 111 |
else:
|
| 112 |
logging.error("No valid video or audio source provided.")
|
| 113 |
+
return current_history, gr.update(interactive=True), update_dataframe(current_history), gr.update(visible=False, value=None), None, None
|
| 114 |
|
| 115 |
try:
|
| 116 |
if library == "OpenAI Whisper":
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
logging.info("Using OpenAI Whisper for transcription.")
|
| 118 |
+
try:
|
| 119 |
+
srt_content, plain_text = transcribe_audio(
|
| 120 |
+
audio_source,
|
| 121 |
+
library="OpenAI Whisper",
|
| 122 |
+
api_key=api_key,
|
| 123 |
+
words_per_sub=int(words_per_sub),
|
| 124 |
+
)
|
| 125 |
+
except Exception as e:
|
| 126 |
+
logging.error(f"Errore chiamata OpenAI Whisper: {e}")
|
| 127 |
+
gr.Error(f"Errore OpenAI Whisper: {e}")
|
| 128 |
+
return current_history, gr.update(interactive=True), update_dataframe(current_history), None, None
|
| 129 |
else:
|
| 130 |
logging.info("Using Faster Whisper for transcription.")
|
| 131 |
+
srt_content, plain_text = transcribe_audio(
|
| 132 |
audio_source,
|
| 133 |
library="faster_whisper",
|
| 134 |
api_key=None,
|
|
|
|
| 138 |
except Exception as e:
|
| 139 |
logging.error(f"Error during transcription: {e}")
|
| 140 |
gr.Error(f"Errore trascrizione: {e}")
|
| 141 |
+
return current_history, gr.update(interactive=True), update_dataframe(current_history), None, None
|
| 142 |
|
| 143 |
base_name = os.path.splitext(os.path.basename(video_path or audio_source))[0]
|
| 144 |
+
engine_suffix = "_openai" if library == "OpenAI Whisper" else "_fasterwhisper"
|
| 145 |
+
srt_filename = os.path.join(TEMP_DIR, f"{base_name}{engine_suffix}.srt")
|
| 146 |
+
txt_filename = os.path.join(TEMP_DIR, f"{base_name}{engine_suffix}.txt")
|
| 147 |
try:
|
| 148 |
save_srt(srt_content, srt_filename)
|
| 149 |
+
from src.subtitle_extractor import save_txt
|
| 150 |
+
save_txt(plain_text, txt_filename)
|
| 151 |
+
logging.info(f"SRT file saved at: {srt_filename}, TXT file saved at: {txt_filename}")
|
| 152 |
except Exception as e:
|
| 153 |
+
logging.error(f"Error saving SRT/TXT file: {e}")
|
| 154 |
+
return current_history, gr.update(interactive=True), update_dataframe(current_history), None, None
|
| 155 |
|
| 156 |
if audio_source.startswith(TEMP_DIR) and os.path.basename(audio_source) == "temp_transcribe_audio.wav":
|
| 157 |
os.remove(audio_source)
|
| 158 |
logging.info("Temporary audio file removed.")
|
| 159 |
|
| 160 |
elapsed_time = time.time() - start_time
|
| 161 |
+
new_entry_srt = {
|
| 162 |
"File SRT": os.path.basename(srt_filename),
|
| 163 |
"Libreria": library,
|
| 164 |
+
"Tipologia SRT": "SRT con tempi",
|
| 165 |
"Percorso Completo": srt_filename,
|
| 166 |
"Video Unito": None,
|
| 167 |
"Orario Generazione": datetime.now().strftime("%H:%M:%S"),
|
| 168 |
"Orario Unione": "",
|
| 169 |
}
|
| 170 |
+
new_entry_txt = {
|
| 171 |
+
"File SRT": os.path.basename(txt_filename),
|
| 172 |
+
"Libreria": library,
|
| 173 |
+
"Tipologia SRT": "Testo puro",
|
| 174 |
+
"Percorso Completo": txt_filename,
|
| 175 |
+
"Video Unito": None,
|
| 176 |
+
"Orario Generazione": datetime.now().strftime("%H:%M:%S"),
|
| 177 |
+
"Orario Unione": "",
|
| 178 |
+
}
|
| 179 |
+
updated_history = current_history.copy()
|
| 180 |
+
updated_history.append(new_entry_srt)
|
| 181 |
+
updated_history.append(new_entry_txt)
|
| 182 |
logging.debug(f"Updated history: {updated_history}")
|
| 183 |
|
| 184 |
+
# Riabilita sempre il pulsante dopo la generazione
|
| 185 |
+
return updated_history, gr.update(interactive=True), update_dataframe(updated_history), audio_editor_update, original_audio_update
|
| 186 |
|
| 187 |
# ... (tutte le altre funzioni helper come save_srt_changes, etc. rimangono qui)
|
| 188 |
|
|
|
|
| 218 |
except FileNotFoundError: VERSION = "1.0.0"
|
| 219 |
BADGE = f"<span style='background:#1976d2;color:white;padding:2px 8px;border-radius:8px;font-size:0.9em;margin-left:8px;'>v{VERSION}</span>"
|
| 220 |
|
| 221 |
+
# Loader HTML come template
|
| 222 |
+
LOADER_HTML_ON = """
|
| 223 |
+
<div id='subtitle-loader' style='display:block;text-align:center;margin-top:0.5em;'>
|
| 224 |
+
<span style='display:inline-block;width:24px;height:24px;border:3px solid #1976d2;border-radius:50%;border-top:3px solid transparent;animation:spin 1s linear infinite;vertical-align:middle;'></span>
|
| 225 |
+
<span style='color:#1976d2;margin-left:8px;'>Generazione sottotitoli in corso...</span>
|
| 226 |
+
</div>
|
| 227 |
+
<style>@keyframes spin{0%{transform:rotate(0deg);}100%{transform:rotate(360deg);}}</style>
|
| 228 |
+
"""
|
| 229 |
+
LOADER_HTML_OFF = """
|
| 230 |
+
<div id='subtitle-loader' style='display:none;text-align:center;margin-top:0.5em;'>
|
| 231 |
+
<span style='display:inline-block;width:24px;height:24px;border:3px solid #1976d2;border-radius:50%;border-top:3px solid transparent;animation:spin 1s linear infinite;vertical-align:middle;'></span>
|
| 232 |
+
<span style='color:#1976d2;margin-left:8px;'>Generazione sottotitoli in corso...</span>
|
| 233 |
+
</div>
|
| 234 |
+
<style>@keyframes spin{0%{transform:rotate(0deg);}100%{transform:rotate(360deg);}}</style>
|
| 235 |
+
"""
|
| 236 |
+
|
| 237 |
with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<script>{js_loader_script}</script>") as demo:
|
| 238 |
srt_history_state = gr.State([])
|
| 239 |
selected_srt_path_state = gr.State(None)
|
| 240 |
original_audio_path_state = gr.State()
|
| 241 |
|
| 242 |
+
gr.Markdown(f"<h1>Transcribe Speech {BADGE}</h1>")
|
| 243 |
|
| 244 |
gr.Markdown("### 1. Carica un file")
|
| 245 |
video_input = gr.File(label="Carica un file video o audio", file_types=["video", "audio"])
|
|
|
|
| 254 |
api_key_input = gr.Textbox(label="API Key OpenAI", type="password", placeholder="sk-...")
|
| 255 |
cost_estimate = gr.Markdown()
|
| 256 |
words_slider = gr.Slider(minimum=6, maximum=15, value=7, step=1, label="Parole per sottotitolo")
|
| 257 |
+
# --- LOADER HTML SEMPRE PRESENTE SOPRA IL PULSANTE ---
|
| 258 |
+
loader_html = gr.HTML(LOADER_HTML_OFF)
|
| 259 |
submit_btn = gr.Button("▶️ Genera Sottotitoli", variant="primary")
|
| 260 |
stop_btn = gr.Button("⏹️ Arresta", variant="stop", visible=False)
|
| 261 |
loader = gr.HTML("""<div id="loader-container" style='text-align:center; display:none; margin-top:1rem;'><div style='display:inline-block; position:relative; width:50px; height:50px;'><svg width='50' height='50' viewBox='0 0 50 50'><circle cx='25' cy='25' r='20' fill='none' stroke='#1976d2' stroke-width='5' stroke-linecap='round' stroke-dasharray='100' stroke-dashoffset='60'><animateTransform attributeName='transform' type='rotate' from='0 25 25' to='360 25 25' dur='1.5s' repeatCount='indefinite'/></circle></svg><div id='timer' style='position:absolute; top:50%; left:50%; transform:translate(-50%,-50%); font-size:0.9em; color:#1976d2;'>0s</div></div></div>""")
|
|
|
|
| 263 |
with gr.Column(scale=2):
|
| 264 |
gr.Markdown("### 3. Anteprima ed Editor")
|
| 265 |
video_preview = gr.Video(label="Anteprima Video/Audio Originale", interactive=False)
|
| 266 |
+
audio_output = gr.Audio(label="Editor Traccia Audio", editable=True, type="filepath", visible=False)
|
| 267 |
+
download_audio_btn = gr.Button("⬇️ Download Audio", variant="primary")
|
| 268 |
+
audio_download_file = gr.File(label="Scarica Audio", visible=False)
|
| 269 |
+
undo_audio_btn = gr.Button("↩️ Ripristina Audio Originale")
|
| 270 |
+
final_video = gr.Video(label="Video Finale con Sottotitoli", interactive=False, visible=False)
|
| 271 |
+
final_video_loader = gr.HTML("""
|
| 272 |
+
<div id='final-video-loader' style='display:none;text-align:center;margin-top:0.5em;'>
|
| 273 |
+
<span style='display:inline-block;width:24px;height:24px;border:3px solid #1976d2;border-radius:50%;border-top:3px solid transparent;animation:spin 1s linear infinite;vertical-align:middle;'></span>
|
| 274 |
+
<span style='color:#1976d2;margin-left:8px;'>Caricamento video sottotitolato...</span>
|
| 275 |
+
</div>
|
| 276 |
+
<style>@keyframes spin{0%{transform:rotate(0deg);}100%{transform:rotate(360deg);}}</style>
|
| 277 |
+
""")
|
| 278 |
|
| 279 |
with gr.Column():
|
| 280 |
gr.Markdown("--- \n### 4. Cronologia e Azioni sui Sottotitoli\n*Seleziona una riga per attivare le azioni.*")
|
| 281 |
+
history_df = gr.Dataframe(headers=["File SRT", "Libreria", "Tipologia SRT", "Orario Generazione", "Video Unito", "Orario Unione"], interactive=True)
|
| 282 |
with gr.Row(visible=False) as action_buttons:
|
| 283 |
edit_btn = gr.Button("📝 Modifica SRT")
|
| 284 |
merge_btn = gr.Button("🎬 Unisci al Video", variant="secondary")
|
| 285 |
delete_btn = gr.Button("🗑️ Elimina", variant="stop")
|
| 286 |
+
download_btn = gr.Button("⬇️ Download SRT", variant="primary")
|
| 287 |
+
srt_download_file = gr.File(label="Scarica SRT", visible=False)
|
| 288 |
with gr.Accordion("Editor Testo Sottotitoli", open=False, visible=False) as srt_editor_accordion:
|
| 289 |
srt_editor_box = gr.Textbox(lines=15, label="Contenuto file .srt", show_copy_button=True, interactive=True)
|
| 290 |
save_edit_btn = gr.Button("💾 Salva Modifiche", variant="primary")
|
|
|
|
| 293 |
|
| 294 |
# MODIFICATA: Logica semplificata e robusta
|
| 295 |
def show_main_controls(file_obj):
|
| 296 |
+
import mimetypes
|
| 297 |
+
import shutil
|
| 298 |
if file_obj:
|
| 299 |
+
file_path = file_obj.name
|
| 300 |
+
mime, _ = mimetypes.guess_type(file_path)
|
| 301 |
+
is_video = mime and mime.startswith("video")
|
| 302 |
+
is_audio = mime and mime.startswith("audio")
|
| 303 |
+
video_preview_update = gr.update(visible=is_video, value=file_path if is_video else None)
|
| 304 |
+
submit_btn_update = gr.update(interactive=True)
|
| 305 |
+
main_panel_update = gr.update(visible=True)
|
| 306 |
+
if is_audio:
|
| 307 |
+
output_dir = os.path.join(os.getcwd(), "output")
|
| 308 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 309 |
+
ext = os.path.splitext(file_path)[1].lower()
|
| 310 |
+
if ext not in [".wav", ".mp3", ".flac", ".ogg"]:
|
| 311 |
+
gr.Error("Formato audio non supportato. Usa WAV, MP3, FLAC o OGG.")
|
| 312 |
+
return gr.update(visible=False, value=None), gr.update(visible=False), gr.update(interactive=False), gr.update(visible=False, value=None)
|
| 313 |
+
base_name = os.path.splitext(os.path.basename(file_path))[0]
|
| 314 |
+
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
|
| 315 |
+
new_audio_name = f"{base_name}_{timestamp}{ext}"
|
| 316 |
+
new_audio_path = os.path.join(output_dir, new_audio_name)
|
| 317 |
+
shutil.copy(file_path, new_audio_path)
|
| 318 |
+
rel_audio_path = os.path.relpath(new_audio_path, os.getcwd())
|
| 319 |
+
video_preview_update = gr.update(visible=False, value=None)
|
| 320 |
+
audio_output_update = gr.update(value=rel_audio_path, visible=True)
|
| 321 |
+
return video_preview_update, main_panel_update, submit_btn_update, audio_output_update
|
| 322 |
+
return video_preview_update, main_panel_update, submit_btn_update, gr.update(visible=False, value=None)
|
| 323 |
+
return gr.update(visible=False, value=None), gr.update(visible=False), gr.update(interactive=False), gr.update(visible=False, value=None)
|
| 324 |
|
| 325 |
def on_select_srt(history_data, evt: gr.SelectData):
|
| 326 |
if evt.index is None:
|
|
|
|
| 375 |
|
| 376 |
# --- CABLAGGIO EVENTI ---
|
| 377 |
|
| 378 |
+
video_input.upload(
|
| 379 |
+
fn=show_main_controls,
|
| 380 |
+
inputs=video_input,
|
| 381 |
+
outputs=[video_preview, main_panel, submit_btn, audio_output],
|
| 382 |
+
)
|
| 383 |
|
| 384 |
extract_audio_btn.click(
|
| 385 |
fn=extract_audio_only,
|
| 386 |
inputs=[video_input],
|
| 387 |
+
outputs=[audio_output, original_audio_path_state],
|
| 388 |
)
|
| 389 |
|
| 390 |
undo_audio_btn.click(
|
|
|
|
| 399 |
outputs=openai_options,
|
| 400 |
)
|
| 401 |
|
| 402 |
+
# Loader HTML sotto il pulsante
|
| 403 |
+
loader_html = gr.HTML(LOADER_HTML_OFF)
|
| 404 |
+
|
| 405 |
+
# Funzione wrapper per mostrare/nascondere loader e disabilitare/abilitare il pulsante
|
| 406 |
+
def transcribe_with_loader(*args):
|
| 407 |
+
from gradio import update
|
| 408 |
+
# Mostra loader (display:block)
|
| 409 |
+
yield None, update(interactive=False), None, gr.update(value=LOADER_HTML_ON), update(visible=True), None, gr.update(value=LOADER_HTML_ON)
|
| 410 |
+
# Esegui la funzione vera
|
| 411 |
+
result = transcribe(*args)
|
| 412 |
+
audio_update = result[3] if result[3] is not None else update(visible=False, value=None)
|
| 413 |
+
# Nascondi loader a fine processo (display:none)
|
| 414 |
+
yield result[0], update(interactive=True), result[2], gr.update(value=LOADER_HTML_OFF), audio_update, result[4], gr.update(value=LOADER_HTML_OFF)
|
| 415 |
+
|
| 416 |
+
# Modifica il submit_btn.click per usare la funzione wrapper e i nuovi output
|
| 417 |
submit_btn.click(
|
| 418 |
+
fn=transcribe_with_loader,
|
| 419 |
inputs=[
|
| 420 |
video_input,
|
| 421 |
audio_output,
|
|
|
|
| 424 |
words_slider,
|
| 425 |
srt_history_state,
|
| 426 |
],
|
| 427 |
+
outputs=[srt_history_state, submit_btn, history_df, loader_html, audio_output, original_audio_path_state, loader_html],
|
| 428 |
+
queue=True,
|
| 429 |
+
show_progress=False,
|
| 430 |
)
|
| 431 |
|
| 432 |
# Aggiorna il cablaggio eventi per history_df
|
|
|
|
| 454 |
outputs=[srt_editor_accordion] # Rende visibile l'accordion
|
| 455 |
)
|
| 456 |
|
| 457 |
+
# --- FIX: Salva modifiche SRT ---
|
| 458 |
+
save_edit_btn.click(
|
| 459 |
+
fn=lambda srt_path, new_content: (save_srt_changes(srt_path, new_content), gr.update(interactive=False)),
|
| 460 |
+
inputs=[selected_srt_path_state, srt_editor_box],
|
| 461 |
+
outputs=[save_edit_btn],
|
| 462 |
+
)
|
| 463 |
+
|
| 464 |
+
# --- FIX: Abilita/disabilita il pulsante Salva solo se ci sono modifiche ---
|
| 465 |
+
def enable_save_btn(srt_path, new_content):
|
| 466 |
+
if not srt_path or not os.path.exists(srt_path):
|
| 467 |
+
return gr.update(interactive=False)
|
| 468 |
+
try:
|
| 469 |
+
with open(srt_path, 'r', encoding='utf-8') as f:
|
| 470 |
+
original = f.read()
|
| 471 |
+
if original != new_content:
|
| 472 |
+
return gr.update(interactive=True)
|
| 473 |
+
else:
|
| 474 |
+
return gr.update(interactive=False)
|
| 475 |
+
except Exception:
|
| 476 |
+
return gr.update(interactive=False)
|
| 477 |
+
|
| 478 |
+
srt_editor_box.change(
|
| 479 |
+
fn=enable_save_btn,
|
| 480 |
+
inputs=[selected_srt_path_state, srt_editor_box],
|
| 481 |
+
outputs=[save_edit_btn],
|
| 482 |
+
)
|
| 483 |
+
# Disabilita il pulsante Salva quando si seleziona un nuovo file
|
| 484 |
+
edit_btn.click(
|
| 485 |
+
fn=lambda: gr.update(interactive=False),
|
| 486 |
+
inputs=[],
|
| 487 |
+
outputs=[save_edit_btn],
|
| 488 |
+
)
|
| 489 |
# Aggiorna il cablaggio eventi per merge_btn
|
| 490 |
merge_btn.click(
|
| 491 |
+
fn=lambda video_path, srt_path: (
|
| 492 |
+
gr.update(visible=True), # Mostra loader
|
| 493 |
+
gr.update(visible=False), # Nascondi il player video
|
| 494 |
+
),
|
| 495 |
inputs=[video_input, selected_srt_path_state],
|
| 496 |
+
outputs=[final_video, final_video_loader],
|
| 497 |
+
queue=True,
|
| 498 |
+
show_progress=False,
|
| 499 |
+
)
|
| 500 |
+
merge_btn.click(
|
| 501 |
+
fn=lambda video_path, srt_path: (
|
| 502 |
+
gr.update(visible=True, value=merge_subtitles(video_path, srt_path)[0]), # Mostra video
|
| 503 |
+
gr.update(visible=False), # Nascondi loader
|
| 504 |
+
),
|
| 505 |
+
inputs=[video_input, selected_srt_path_state],
|
| 506 |
+
outputs=[final_video, final_video_loader],
|
| 507 |
+
queue=True,
|
| 508 |
+
show_progress=False,
|
| 509 |
)
|
| 510 |
|
| 511 |
# Rende visibile il video finale quando viene cliccato il pulsante
|
|
|
|
| 515 |
outputs=[final_video] # Rende visibile il componente del video finale
|
| 516 |
)
|
| 517 |
|
| 518 |
+
# Riabilita il pulsante 'Genera Sottotitoli' dopo l'unione
|
| 519 |
+
merge_btn.click(
|
| 520 |
+
fn=lambda: gr.update(interactive=True),
|
| 521 |
+
inputs=[],
|
| 522 |
+
outputs=[submit_btn]
|
| 523 |
+
)
|
| 524 |
+
|
| 525 |
# Aggiorna il cablaggio eventi per delete_btn
|
| 526 |
delete_btn.click(
|
| 527 |
fn=delete_selected,
|
|
|
|
| 529 |
outputs=[srt_history_state, action_buttons] # Update history and hide action buttons
|
| 530 |
)
|
| 531 |
|
| 532 |
+
# Download SRT: mostra il file selezionato come download
|
| 533 |
+
download_btn.click(
|
| 534 |
+
fn=lambda srt_path: gr.update(value=srt_path, visible=True) if srt_path and os.path.exists(srt_path) else gr.update(visible=False),
|
| 535 |
+
inputs=[selected_srt_path_state],
|
| 536 |
+
outputs=[srt_download_file],
|
| 537 |
+
)
|
| 538 |
+
|
| 539 |
+
# Download Audio: mostra il file audio corrente come download
|
| 540 |
+
download_audio_btn.click(
|
| 541 |
+
fn=lambda audio_path: gr.update(value=audio_path, visible=True) if audio_path and os.path.exists(audio_path) else gr.update(visible=False),
|
| 542 |
+
inputs=[audio_output],
|
| 543 |
+
outputs=[audio_download_file],
|
| 544 |
+
)
|
| 545 |
+
|
| 546 |
+
|
| 547 |
+
# --- PULIZIA FILE DI OUTPUT ALL'AVVIO ---
|
| 548 |
+
def clean_output_dirs():
|
| 549 |
+
for folder in ["output", os.path.join("output", "subtitles")]:
|
| 550 |
+
if os.path.exists(folder):
|
| 551 |
+
for filename in os.listdir(folder):
|
| 552 |
+
file_path = os.path.join(folder, filename)
|
| 553 |
+
try:
|
| 554 |
+
if os.path.isfile(file_path) or os.path.islink(file_path):
|
| 555 |
+
os.unlink(file_path)
|
| 556 |
+
elif os.path.isdir(file_path):
|
| 557 |
+
shutil.rmtree(file_path)
|
| 558 |
+
except Exception as e:
|
| 559 |
+
print(f"Errore durante la cancellazione di {file_path}: {e}")
|
| 560 |
+
|
| 561 |
+
clean_output_dirs()
|
| 562 |
|
| 563 |
if __name__ == "__main__":
|
| 564 |
demo.queue().launch() # Rimosso `share=True` per eseguire l'app localmente
|
manifest.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
{
|
| 2 |
-
"version": "1.
|
| 3 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"version": "1.2.0"
|
| 3 |
}
|
output/Audio 2_ Take 2 mp3cut.net_20250623192512.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:305fbc33f6c73d5999d4f95939dda56fc461e5b277cb685cc5ad9bd1f95a42b1
|
| 3 |
+
size 10645622
|
requirements.txt
CHANGED
|
@@ -2,5 +2,5 @@ gradio
|
|
| 2 |
pandas
|
| 3 |
faster-whisper
|
| 4 |
moviepy==1.0.3
|
| 5 |
-
openai
|
| 6 |
ffmpeg-python # Aggiunto per robustezza, anche se usiamo subprocess
|
|
|
|
| 2 |
pandas
|
| 3 |
faster-whisper
|
| 4 |
moviepy==1.0.3
|
| 5 |
+
openai>=1.0.0
|
| 6 |
ffmpeg-python # Aggiunto per robustezza, anche se usiamo subprocess
|
src/__pycache__/subtitle_extractor.cpython-313.pyc
CHANGED
|
Binary files a/src/__pycache__/subtitle_extractor.cpython-313.pyc and b/src/__pycache__/subtitle_extractor.cpython-313.pyc differ
|
|
|
src/subtitle_extractor.py
CHANGED
|
@@ -8,6 +8,8 @@ import subprocess
|
|
| 8 |
from dataclasses import dataclass
|
| 9 |
from typing import List, Optional
|
| 10 |
|
|
|
|
|
|
|
| 11 |
# MoviePy is an optional dependency used when extracting audio. It is imported
|
| 12 |
# lazily to avoid issues when running in environments where it is not
|
| 13 |
# available (for instance during unit tests).
|
|
@@ -19,6 +21,8 @@ except ImportError: # pragma: no cover - optional dependency
|
|
| 19 |
|
| 20 |
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
|
| 21 |
|
|
|
|
|
|
|
| 22 |
|
| 23 |
def format_timestamp(seconds: float) -> str:
|
| 24 |
"""Return timestamp in SRT format."""
|
|
@@ -63,43 +67,109 @@ def _segments_to_srt(segments: List[SubtitleLine]) -> str:
|
|
| 63 |
return "\n".join(lines)
|
| 64 |
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
def transcribe_audio(
|
| 67 |
audio_path: str,
|
| 68 |
library: str = "faster_whisper",
|
| 69 |
api_key: Optional[str] = None,
|
| 70 |
model_size: str = "base",
|
| 71 |
words_per_sub: int = 7,
|
| 72 |
-
) -> str:
|
| 73 |
-
"""Transcribe *audio_path* and return SRT content."""
|
| 74 |
logging.debug(f"Starting transcription with library: {library}, audio_path: {audio_path}")
|
| 75 |
|
|
|
|
| 76 |
if library == "OpenAI Whisper":
|
| 77 |
if api_key is None:
|
| 78 |
raise ValueError("api_key is required for OpenAI Whisper")
|
| 79 |
import openai
|
| 80 |
|
| 81 |
openai.api_key = api_key
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
)
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
else:
|
| 104 |
if WhisperModel is None:
|
| 105 |
raise RuntimeError("faster_whisper is not installed")
|
|
@@ -107,6 +177,7 @@ def transcribe_audio(
|
|
| 107 |
model = WhisperModel(model_size)
|
| 108 |
segs = model.transcribe(audio_path)[0]
|
| 109 |
segments = [SubtitleLine(start=s.start, end=s.end, text=s.text) for s in segs]
|
|
|
|
| 110 |
logging.debug(f"Generated segments: {segments}")
|
| 111 |
|
| 112 |
if not segments:
|
|
@@ -115,7 +186,7 @@ def transcribe_audio(
|
|
| 115 |
|
| 116 |
srt_content = _segments_to_srt(segments)
|
| 117 |
logging.debug(f"Generated SRT content: {srt_content}")
|
| 118 |
-
return srt_content
|
| 119 |
|
| 120 |
|
| 121 |
def save_srt(content: str, output_path: str) -> str:
|
|
@@ -124,6 +195,12 @@ def save_srt(content: str, output_path: str) -> str:
|
|
| 124 |
return output_path
|
| 125 |
|
| 126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
def merge_subtitles(video_path: str, srt_path: str, output_path: str) -> str:
|
| 128 |
command = [
|
| 129 |
"ffmpeg",
|
|
|
|
| 8 |
from dataclasses import dataclass
|
| 9 |
from typing import List, Optional
|
| 10 |
|
| 11 |
+
from pydub import AudioSegment
|
| 12 |
+
|
| 13 |
# MoviePy is an optional dependency used when extracting audio. It is imported
|
| 14 |
# lazily to avoid issues when running in environments where it is not
|
| 15 |
# available (for instance during unit tests).
|
|
|
|
| 21 |
|
| 22 |
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
|
| 23 |
|
| 24 |
+
MAX_OPENAI_AUDIO_SIZE = 25 * 1024 * 1024 # 25 MB
|
| 25 |
+
|
| 26 |
|
| 27 |
def format_timestamp(seconds: float) -> str:
|
| 28 |
"""Return timestamp in SRT format."""
|
|
|
|
| 67 |
return "\n".join(lines)
|
| 68 |
|
| 69 |
|
| 70 |
+
def _export_and_transcribe_segment(seg, idx, audio_path, openai, words_per_sub, time_offset):
|
| 71 |
+
"""Esporta un segmento in MP3, verifica la dimensione e lo suddivide ricorsivamente se necessario."""
|
| 72 |
+
import tempfile
|
| 73 |
+
segment_list = []
|
| 74 |
+
txt_list = []
|
| 75 |
+
with tempfile.NamedTemporaryFile(suffix=f"_part{idx}.mp3", delete=False) as temp_file:
|
| 76 |
+
seg.export(temp_file.name, format="mp3")
|
| 77 |
+
temp_size = os.path.getsize(temp_file.name)
|
| 78 |
+
logging.debug(f"Segmento {idx}: dimensione {temp_size} byte (MP3)")
|
| 79 |
+
if temp_size > MAX_OPENAI_AUDIO_SIZE:
|
| 80 |
+
# Suddividi ulteriormente il segmento
|
| 81 |
+
logging.info(f"Segmento {idx} ancora troppo grande, suddivisione ricorsiva...")
|
| 82 |
+
duration_ms = len(seg)
|
| 83 |
+
mid = duration_ms // 2
|
| 84 |
+
seg1 = seg[:mid]
|
| 85 |
+
seg2 = seg[mid:]
|
| 86 |
+
# Ricorsione su ciascuna metà
|
| 87 |
+
segs1, txts1 = _export_and_transcribe_segment(seg1, f"{idx}a", audio_path, openai, words_per_sub, time_offset)
|
| 88 |
+
segs2, txts2 = _export_and_transcribe_segment(seg2, f"{idx}b", audio_path, openai, words_per_sub, time_offset + seg1.duration_seconds)
|
| 89 |
+
segment_list.extend(segs1)
|
| 90 |
+
segment_list.extend(segs2)
|
| 91 |
+
txt_list.extend(txts1)
|
| 92 |
+
txt_list.extend(txts2)
|
| 93 |
+
else:
|
| 94 |
+
with open(temp_file.name, "rb") as audio_file:
|
| 95 |
+
result = openai.audio.transcriptions.create(
|
| 96 |
+
model="whisper-1",
|
| 97 |
+
file=audio_file,
|
| 98 |
+
response_format="json",
|
| 99 |
+
)
|
| 100 |
+
words = result.text.split()
|
| 101 |
+
plain = result.text.strip()
|
| 102 |
+
txt_list.append(plain)
|
| 103 |
+
# Ricostruisci segmenti SRT con offset temporale
|
| 104 |
+
segs = []
|
| 105 |
+
start = time_offset
|
| 106 |
+
step = 3.0
|
| 107 |
+
for i in range(0, len(words), words_per_sub):
|
| 108 |
+
end = start + step
|
| 109 |
+
text = " ".join(words[i : i + words_per_sub])
|
| 110 |
+
segs.append(SubtitleLine(start=start, end=end, text=text))
|
| 111 |
+
start = end
|
| 112 |
+
segment_list.extend(segs)
|
| 113 |
+
os.remove(temp_file.name)
|
| 114 |
+
return segment_list, txt_list
|
| 115 |
+
|
| 116 |
+
|
| 117 |
def transcribe_audio(
|
| 118 |
audio_path: str,
|
| 119 |
library: str = "faster_whisper",
|
| 120 |
api_key: Optional[str] = None,
|
| 121 |
model_size: str = "base",
|
| 122 |
words_per_sub: int = 7,
|
| 123 |
+
) -> tuple[str, str]:
|
| 124 |
+
"""Transcribe *audio_path* and return (SRT content, plain text content)."""
|
| 125 |
logging.debug(f"Starting transcription with library: {library}, audio_path: {audio_path}")
|
| 126 |
|
| 127 |
+
plain_text = None
|
| 128 |
if library == "OpenAI Whisper":
|
| 129 |
if api_key is None:
|
| 130 |
raise ValueError("api_key is required for OpenAI Whisper")
|
| 131 |
import openai
|
| 132 |
|
| 133 |
openai.api_key = api_key
|
| 134 |
+
# --- Gestione file troppo grandi ---
|
| 135 |
+
if os.path.getsize(audio_path) > MAX_OPENAI_AUDIO_SIZE:
|
| 136 |
+
logging.info("Audio troppo grande, suddivisione in segmenti...")
|
| 137 |
+
audio = AudioSegment.from_file(audio_path)
|
| 138 |
+
duration_ms = len(audio)
|
| 139 |
+
segment_length_ms = 20 * 60 * 1000
|
| 140 |
+
segments = [audio[i : i + segment_length_ms] for i in range(0, duration_ms, segment_length_ms)]
|
| 141 |
+
srt_parts = []
|
| 142 |
+
txt_parts = []
|
| 143 |
+
time_offset = 0.0
|
| 144 |
+
for idx, seg in enumerate(segments):
|
| 145 |
+
segs, txts = _export_and_transcribe_segment(seg, idx, audio_path, openai, words_per_sub, time_offset)
|
| 146 |
+
srt_parts.extend(segs)
|
| 147 |
+
txt_parts.extend(txts)
|
| 148 |
+
time_offset += seg.duration_seconds
|
| 149 |
+
segments = srt_parts
|
| 150 |
+
plain_text = " ".join(txt_parts)
|
| 151 |
+
else:
|
| 152 |
+
with open(audio_path, "rb") as audio_file:
|
| 153 |
+
result = openai.audio.transcriptions.create(
|
| 154 |
+
model="whisper-1",
|
| 155 |
+
file=audio_file,
|
| 156 |
+
response_format="json",
|
| 157 |
+
)
|
| 158 |
+
logging.debug(f"OpenAI API response: {result}")
|
| 159 |
+
words = result.text.split()
|
| 160 |
+
plain_text = result.text.strip()
|
| 161 |
+
if not words:
|
| 162 |
+
logging.error("No text returned by OpenAI Whisper API.")
|
| 163 |
+
raise ValueError("No text returned by OpenAI Whisper API.")
|
| 164 |
+
segments = []
|
| 165 |
+
start = 0.0
|
| 166 |
+
step = 3.0
|
| 167 |
+
for i in range(0, len(words), words_per_sub):
|
| 168 |
+
end = start + step
|
| 169 |
+
text = " ".join(words[i : i + words_per_sub])
|
| 170 |
+
segments.append(SubtitleLine(start=start, end=end, text=text))
|
| 171 |
+
start = end
|
| 172 |
+
logging.debug(f"Generated segments: {segments}")
|
| 173 |
else:
|
| 174 |
if WhisperModel is None:
|
| 175 |
raise RuntimeError("faster_whisper is not installed")
|
|
|
|
| 177 |
model = WhisperModel(model_size)
|
| 178 |
segs = model.transcribe(audio_path)[0]
|
| 179 |
segments = [SubtitleLine(start=s.start, end=s.end, text=s.text) for s in segs]
|
| 180 |
+
plain_text = " ".join([s.text.strip() for s in segments])
|
| 181 |
logging.debug(f"Generated segments: {segments}")
|
| 182 |
|
| 183 |
if not segments:
|
|
|
|
| 186 |
|
| 187 |
srt_content = _segments_to_srt(segments)
|
| 188 |
logging.debug(f"Generated SRT content: {srt_content}")
|
| 189 |
+
return srt_content, plain_text
|
| 190 |
|
| 191 |
|
| 192 |
def save_srt(content: str, output_path: str) -> str:
|
|
|
|
| 195 |
return output_path
|
| 196 |
|
| 197 |
|
| 198 |
+
def save_txt(content: str, output_path: str) -> str:
|
| 199 |
+
with open(output_path, "w", encoding="utf-8") as f:
|
| 200 |
+
f.write(content)
|
| 201 |
+
return output_path
|
| 202 |
+
|
| 203 |
+
|
| 204 |
def merge_subtitles(video_path: str, srt_path: str, output_path: str) -> str:
|
| 205 |
command = [
|
| 206 |
"ffmpeg",
|