Spaces:

danzapp70
/

myTools

Sleeping

App Files Files Community

danzapp70 commited on Jun 23, 2025

Commit

3c11817

verified ·

1 Parent(s): 0d6f640

Deploy version v1.2.0

Browse files

Files changed (8) hide show

.gitattributes +1 -0
Documentation.md +43 -0
app.py +238 -59
manifest.json +1 -1
output/Audio 2_ Take 2 mp3cut.net_20250623192512.wav +3 -0
requirements.txt +1 -1
src/__pycache__/subtitle_extractor.cpython-313.pyc +0 -0
src/subtitle_extractor.py +101 -24

.gitattributes CHANGED Viewed

@@ -36,3 +36,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 output/romano_subbed_with_romano_faster_whisper.mp4 filter=lfs diff=lfs merge=lfs -text
 output/romano_subbed_with_romano_openai_whisper.mp4 filter=lfs diff=lfs merge=lfs -text
 output/romano_audio.mp3 filter=lfs diff=lfs merge=lfs -text

 output/romano_subbed_with_romano_faster_whisper.mp4 filter=lfs diff=lfs merge=lfs -text
 output/romano_subbed_with_romano_openai_whisper.mp4 filter=lfs diff=lfs merge=lfs -text
 output/romano_audio.mp3 filter=lfs diff=lfs merge=lfs -text
+output/Audio[[:space:]]2_[[:space:]]Take[[:space:]]2[[:space:]]mp3cut.net_20250623192512.wav filter=lfs diff=lfs merge=lfs -text

Documentation.md CHANGED Viewed

	@@ -0,0 +1,43 @@

+# Direzione per lo Sviluppo la Gestione e la Sicurezza dei Sistemi Informativi e l'Innovazione Digitale
+## Transcribe it - Webapp Gradio
+### Descrizione
+Questa applicazione web consente di estrarre, modificare e scaricare sottotitoli da file video o audio, con una gestione avanzata dello storico, editor audio, player video custom e robusta gestione degli errori. L'interfaccia è realizzata con Gradio e ottimizzata per la massima semplicità d'uso.
+### Funzionalità principali
+- **Upload di file video o audio**: supporto a formati comuni (mp4, mp3, wav, ecc.).
+- **Estrazione automatica dell'audio dai video**.
+- **Editor audio integrato**: possibilità di modificare la traccia audio prima della generazione sottotitoli.
+- **Generazione sottotitoli**:
+  - Supporto a due motori: Faster Whisper (locale) e OpenAI Whisper (cloud, con gestione API Key).
+  - Suddivisione automatica dei file audio troppo grandi (>25MB) in segmenti MP3, con trascrizione e ricostruzione automatica.
+  - Salvataggio sia del file SRT (con tempi) che del file TXT (solo testo puro).
+- **Storico processi**:
+  - Ogni generazione aggiunge due righe: una per il file SRT, una per il TXT.
+  - Colonna "Tipologia SRT" per distinguere tra SRT con tempi e testo puro.
+  - Cronologia consultabile e azioni rapide (modifica, download, unione video, elimina).
+- **Player video custom**: anteprima del video originale e del video sottotitolato.
+- **Player audio**: sempre visibile dopo upload/estrazione e durante la generazione sottotitoli.
+- **Editor SRT integrato**: modifica e salvataggio diretto dei sottotitoli.
+- **Unione sottotitoli al video**: generazione automatica del video finale con sottotitoli hardcoded.
+- **Download**: scarica file SRT, TXT e audio modificato.
+- **Loader visivi**: spinner e messaggi di caricamento durante tutte le operazioni lunghe (generazione sottotitoli, merge video, caricamento player).
+- **Gestione robusta degli errori**: feedback chiari e nessun crash anche in caso di input non valido o errori API.
+- **Compatibilità multipiattaforma**: funziona su Windows, macOS e Linux.
+### Requisiti
+- Python 3.8+
+- Dipendenze principali: gradio, moviepy, pydub, pandas, faster-whisper, openai
+### Avvio rapido
+1. Installa le dipendenze: `pip install -r requirements.txt`
+2. Avvia l'app: `python app.py`
+3. Accedi all'interfaccia web tramite il link fornito in console.
+### Note di sicurezza
+- La chiave API OpenAI non viene salvata e viene usata solo per la sessione corrente.
+- I file temporanei vengono eliminati automaticamente al termine della sessione.
+---

app.py CHANGED Viewed

@@ -3,10 +3,10 @@ import os
 import json
 import logging
 from moviepy.editor import VideoFileClip, AudioFileClip
 from src.subtitle_extractor import transcribe_audio, save_srt
 import time
-import shutil
 import subprocess
 from datetime import datetime
 import pandas as pd
@@ -37,30 +37,20 @@ def format_timestamp(seconds):
 def extract_audio_only(video_path, progress=gr.Progress(track_tqdm=True)):
     if not video_path:
         gr.Warning("Carica prima un video per estrarre l'audio.")
-        # Restituisce 3 valori anche in caso di errore
-        return None, None, gr.update(visible=False)
     try:
         gr.Info("Estrazione audio in corso...")
         video = VideoFileClip(video_path)
-        output_dir = os.path.join(os.getcwd(), "output") # Salva ancora nella cartella temporanea definita all'inizio
         os.makedirs(output_dir, exist_ok=True)
         base_name = os.path.splitext(os.path.basename(video_path))[0]
         audio_filename = os.path.join(output_dir, f"{base_name}_audio.mp3")
         video.audio.write_audiofile(audio_filename, logger=None)
         gr.Info("Estrazione audio completata.")
-        # --- LA RIGA CORRETTA È QUESTA ---
-        # Ora restituisce 3 valori: il player, lo stato per l'undo, e la visibilità del gruppo
-        return gr.update(value=audio_filename, visible=True), audio_filename, gr.update(visible=True)
     except Exception as e:
         gr.Error(f"Errore durante l'estrazione dell'audio: {e}")
-        # Restituisce 3 valori anche in caso di eccezione
-        return None, None, gr.update(visible=False)
 def merge_subtitles(video_path, srt_path, progress=gr.Progress(track_tqdm=True)):
     if not video_path or not srt_path:
         gr.Warning("Percorso video o sottotitoli mancante!"); return None, None
@@ -84,42 +74,61 @@ def transcribe(video_path, edited_audio_path, library, api_key, words_per_sub, c
     global stop_requested
     if stop_requested:
         logging.warning("Transcription stopped by user.")
-        return current_history, gr.update(interactive=True), update_dataframe(current_history)
     audio_source = None
     if edited_audio_path and os.path.exists(edited_audio_path):
         logging.info("Using edited audio for transcription.")
         audio_source = edited_audio_path
     elif video_path and os.path.exists(video_path):
         logging.info("Extracting audio from original video for transcription...")
         try:
             video = VideoFileClip(video_path)
-            audio_source = os.path.join(TEMP_DIR, "temp_transcribe_audio.wav")
-            video.audio.write_audiofile(audio_source, logger=None)
-            logging.info(f"Audio extracted to: {audio_source}")
         except Exception as e:
             logging.error(f"Error extracting audio: {e}")
-            return current_history, gr.update(interactive=True), update_dataframe(current_history)
     else:
         logging.error("No valid video or audio source provided.")
-        return current_history, gr.update(interactive=True), update_dataframe(current_history)
     try:
         if library == "OpenAI Whisper":
-            if not api_key:
-                logging.error("Missing OpenAI API Key.")
-                gr.Error("API Key OpenAI mancante.")
-                return current_history, gr.update(interactive=True), update_dataframe(current_history)
             logging.info("Using OpenAI Whisper for transcription.")
-            srt_content = transcribe_audio(
-                audio_source,
-                library="OpenAI Whisper",
-                api_key=api_key,
-                words_per_sub=int(words_per_sub),
-            )
         else:
             logging.info("Using Faster Whisper for transcription.")
-            srt_content = transcribe_audio(
                 audio_source,
                 library="faster_whisper",
                 api_key=None,
@@ -129,37 +138,51 @@ def transcribe(video_path, edited_audio_path, library, api_key, words_per_sub, c
     except Exception as e:
         logging.error(f"Error during transcription: {e}")
         gr.Error(f"Errore trascrizione: {e}")
-        return current_history, gr.update(interactive=True), update_dataframe(current_history)
     base_name = os.path.splitext(os.path.basename(video_path or audio_source))[0]
-    srt_filename = os.path.join(TEMP_DIR, f"{base_name}.srt")
     try:
         save_srt(srt_content, srt_filename)
-        logging.info(f"SRT file saved successfully at: {srt_filename}")
     except Exception as e:
-        logging.error(f"Error saving SRT file: {e}")
-        return current_history, gr.update(interactive=True), update_dataframe(current_history)
     if audio_source.startswith(TEMP_DIR) and os.path.basename(audio_source) == "temp_transcribe_audio.wav":
         os.remove(audio_source)
         logging.info("Temporary audio file removed.")
     elapsed_time = time.time() - start_time
-    new_entry = {
         "File SRT": os.path.basename(srt_filename),
         "Libreria": library,
-        "Tempo Impiegato (s)": f"{elapsed_time:.2f}",
         "Percorso Completo": srt_filename,
         "Video Unito": None,
         "Orario Generazione": datetime.now().strftime("%H:%M:%S"),
         "Orario Unione": "",
     }
-    logging.debug(f"Adding new entry to history: {new_entry}")
-    updated_history = [e for e in current_history if e["File SRT"] != os.path.basename(srt_filename)]
-    updated_history.append(new_entry)
     logging.debug(f"Updated history: {updated_history}")
-    return updated_history, gr.update(interactive=False), update_dataframe(updated_history)
 # ... (tutte le altre funzioni helper come save_srt_changes, etc. rimangono qui)
@@ -195,12 +218,28 @@ try:
 except FileNotFoundError: VERSION = "1.0.0"
 BADGE = f"<span style='background:#1976d2;color:white;padding:2px 8px;border-radius:8px;font-size:0.9em;margin-left:8px;'>v{VERSION}</span>"
 with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<script>{js_loader_script}</script>") as demo:
     srt_history_state = gr.State([])
     selected_srt_path_state = gr.State(None)
     original_audio_path_state = gr.State()
-    gr.Markdown(f"<h1>Estrattore Sottotitoli {BADGE}</h1>")
     gr.Markdown("### 1. Carica un file")
     video_input = gr.File(label="Carica un file video o audio", file_types=["video", "audio"])
@@ -215,6 +254,8 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
                 api_key_input = gr.Textbox(label="API Key OpenAI", type="password", placeholder="sk-...")
                 cost_estimate = gr.Markdown()
                 words_slider = gr.Slider(minimum=6, maximum=15, value=7, step=1, label="Parole per sottotitolo")
             submit_btn = gr.Button("▶️ Genera Sottotitoli", variant="primary")
             stop_btn = gr.Button("⏹️ Arresta", variant="stop", visible=False)
             loader = gr.HTML("""<div id="loader-container" style='text-align:center; display:none; margin-top:1rem;'><div style='display:inline-block; position:relative; width:50px; height:50px;'><svg width='50' height='50' viewBox='0 0 50 50'><circle cx='25' cy='25' r='20' fill='none' stroke='#1976d2' stroke-width='5' stroke-linecap='round' stroke-dasharray='100' stroke-dashoffset='60'><animateTransform attributeName='transform' type='rotate' from='0 25 25' to='360 25 25' dur='1.5s' repeatCount='indefinite'/></circle></svg><div id='timer' style='position:absolute; top:50%; left:50%; transform:translate(-50%,-50%); font-size:0.9em; color:#1976d2;'>0s</div></div></div>""")
@@ -222,18 +263,28 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
         with gr.Column(scale=2):
             gr.Markdown("### 3. Anteprima ed Editor")
             video_preview = gr.Video(label="Anteprima Video/Audio Originale", interactive=False)
-            with gr.Group(visible=False) as audio_editor_group:
-                audio_output = gr.Audio(label="Editor Traccia Audio", editable=True, type="filepath")
-                undo_audio_btn = gr.Button("↩️ Ripristina Audio Originale")
-            final_video = gr.Video(label="Video Finale con Sottotitoli", interactive=False)
     with gr.Column():
         gr.Markdown("--- \n### 4. Cronologia e Azioni sui Sottotitoli\n*Seleziona una riga per attivare le azioni.*")
-        history_df = gr.Dataframe(headers=["File SRT", "Libreria", "Orario Generazione", "Video Unito", "Orario Unione"], interactive=True)
         with gr.Row(visible=False) as action_buttons:
             edit_btn = gr.Button("📝 Modifica SRT")
             merge_btn = gr.Button("🎬 Unisci al Video", variant="secondary")
             delete_btn = gr.Button("🗑️ Elimina", variant="stop")
         with gr.Accordion("Editor Testo Sottotitoli", open=False, visible=False) as srt_editor_accordion:
             srt_editor_box = gr.Textbox(lines=15, label="Contenuto file .srt", show_copy_button=True, interactive=True)
             save_edit_btn = gr.Button("💾 Salva Modifiche", variant="primary")
@@ -242,11 +293,34 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
     # MODIFICATA: Logica semplificata e robusta
     def show_main_controls(file_obj):
         if file_obj:
-            # Se un file viene caricato, mostra il pannello principale e l'anteprima
-            return gr.update(visible=True, value=file_obj.name), gr.update(visible=True), gr.update(interactive=True)
-        # Se il file viene cancellato, nascondi tutto
-        return gr.update(visible=False, value=None), gr.update(visible=False), gr.update(interactive=False)
     def on_select_srt(history_data, evt: gr.SelectData):
         if evt.index is None:
@@ -301,12 +375,16 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
     # --- CABLAGGIO EVENTI ---
-    video_input.upload(fn=show_main_controls, inputs=video_input, outputs=[video_preview, main_panel, submit_btn])
     extract_audio_btn.click(
         fn=extract_audio_only,
         inputs=[video_input],
-        outputs=[audio_output, original_audio_path_state, audio_editor_group],
     )
     undo_audio_btn.click(
@@ -321,8 +399,23 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
         outputs=openai_options,
     )
     submit_btn.click(
-        fn=transcribe,
         inputs=[
             video_input,
             audio_output,
@@ -331,7 +424,9 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
             words_slider,
             srt_history_state,
         ],
-        outputs=[srt_history_state, submit_btn, history_df],
     )
     # Aggiorna il cablaggio eventi per history_df
@@ -359,11 +454,58 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
         outputs=[srt_editor_accordion]  # Rende visibile l'accordion
     )
     # Aggiorna il cablaggio eventi per merge_btn
     merge_btn.click(
-        fn=merge_subtitles,
         inputs=[video_input, selected_srt_path_state],
-        outputs=[final_video]  # Aggiorna solo il contenuto del video finale
     )
     # Rende visibile il video finale quando viene cliccato il pulsante
@@ -373,6 +515,13 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
         outputs=[final_video]  # Rende visibile il componente del video finale
     )
     # Aggiorna il cablaggio eventi per delete_btn
     delete_btn.click(
         fn=delete_selected,
@@ -380,6 +529,36 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
         outputs=[srt_history_state, action_buttons]  # Update history and hide action buttons
     )
 if __name__ == "__main__":
     demo.queue().launch()  # Rimosso `share=True` per eseguire l'app localmente

 import json
 import logging
 from moviepy.editor import VideoFileClip, AudioFileClip
+import shutil
 from src.subtitle_extractor import transcribe_audio, save_srt
 import time
 import subprocess
 from datetime import datetime
 import pandas as pd
 def extract_audio_only(video_path, progress=gr.Progress(track_tqdm=True)):
     if not video_path:
         gr.Warning("Carica prima un video per estrarre l'audio.")
+        return gr.update(visible=False, value=None), None, gr.update(visible=False, value=None)
     try:
         gr.Info("Estrazione audio in corso...")
         video = VideoFileClip(video_path)
+        output_dir = os.path.join(os.getcwd(), "output")
         os.makedirs(output_dir, exist_ok=True)
         base_name = os.path.splitext(os.path.basename(video_path))[0]
         audio_filename = os.path.join(output_dir, f"{base_name}_audio.mp3")
         video.audio.write_audiofile(audio_filename, logger=None)
         gr.Info("Estrazione audio completata.")
+        return gr.update(value=audio_filename, visible=True), audio_filename, gr.update(visible=True, value=audio_filename)
     except Exception as e:
         gr.Error(f"Errore durante l'estrazione dell'audio: {e}")
+        return gr.update(visible=False, value=None), None, gr.update(visible=False, value=None)
 def merge_subtitles(video_path, srt_path, progress=gr.Progress(track_tqdm=True)):
     if not video_path or not srt_path:
         gr.Warning("Percorso video o sottotitoli mancante!"); return None, None
     global stop_requested
     if stop_requested:
         logging.warning("Transcription stopped by user.")
+        return current_history, gr.update(interactive=True), update_dataframe(current_history), None, None
+    # --- VALIDAZIONE API KEY ---
+    if library == "OpenAI Whisper" and (not api_key or not api_key.strip()):
+        gr.Error("Devi inserire la API Key OpenAI per usare questa modalità.")
+        return current_history, gr.update(interactive=True), update_dataframe(current_history), None, None
     audio_source = None
+    audio_editor_update = None
+    original_audio_update = None
     if edited_audio_path and os.path.exists(edited_audio_path):
         logging.info("Using edited audio for transcription.")
         audio_source = edited_audio_path
+        audio_editor_update = gr.update(value=edited_audio_path, visible=True)
+        original_audio_update = edited_audio_path
     elif video_path and os.path.exists(video_path):
         logging.info("Extracting audio from original video for transcription...")
         try:
             video = VideoFileClip(video_path)
+            temp_audio_path = os.path.join(TEMP_DIR, "temp_transcribe_audio.wav")
+            video.audio.write_audiofile(temp_audio_path, logger=None)
+            output_dir = os.path.join(os.getcwd(), "output")
+            os.makedirs(output_dir, exist_ok=True)
+            base_name = os.path.splitext(os.path.basename(video_path))[0]
+            audio_filename = os.path.join(output_dir, f"{base_name}_audio_gradio.wav")
+            import shutil
+            shutil.copy(temp_audio_path, audio_filename)
+            rel_audio_path = os.path.relpath(audio_filename, os.getcwd())
+            audio_source = audio_filename
+            audio_editor_update = gr.update(value=rel_audio_path, visible=True)
+            original_audio_update = rel_audio_path
         except Exception as e:
             logging.error(f"Error extracting audio: {e}")
+            return current_history, gr.update(interactive=True), update_dataframe(current_history), gr.update(visible=False, value=None), None, None
     else:
         logging.error("No valid video or audio source provided.")
+        return current_history, gr.update(interactive=True), update_dataframe(current_history), gr.update(visible=False, value=None), None, None
     try:
         if library == "OpenAI Whisper":
             logging.info("Using OpenAI Whisper for transcription.")
+            try:
+                srt_content, plain_text = transcribe_audio(
+                    audio_source,
+                    library="OpenAI Whisper",
+                    api_key=api_key,
+                    words_per_sub=int(words_per_sub),
+                )
+            except Exception as e:
+                logging.error(f"Errore chiamata OpenAI Whisper: {e}")
+                gr.Error(f"Errore OpenAI Whisper: {e}")
+                return current_history, gr.update(interactive=True), update_dataframe(current_history), None, None
         else:
             logging.info("Using Faster Whisper for transcription.")
+            srt_content, plain_text = transcribe_audio(
                 audio_source,
                 library="faster_whisper",
                 api_key=None,
     except Exception as e:
         logging.error(f"Error during transcription: {e}")
         gr.Error(f"Errore trascrizione: {e}")
+        return current_history, gr.update(interactive=True), update_dataframe(current_history), None, None
     base_name = os.path.splitext(os.path.basename(video_path or audio_source))[0]
+    engine_suffix = "_openai" if library == "OpenAI Whisper" else "_fasterwhisper"
+    srt_filename = os.path.join(TEMP_DIR, f"{base_name}{engine_suffix}.srt")
+    txt_filename = os.path.join(TEMP_DIR, f"{base_name}{engine_suffix}.txt")
     try:
         save_srt(srt_content, srt_filename)
+        from src.subtitle_extractor import save_txt
+        save_txt(plain_text, txt_filename)
+        logging.info(f"SRT file saved at: {srt_filename}, TXT file saved at: {txt_filename}")
     except Exception as e:
+        logging.error(f"Error saving SRT/TXT file: {e}")
+        return current_history, gr.update(interactive=True), update_dataframe(current_history), None, None
     if audio_source.startswith(TEMP_DIR) and os.path.basename(audio_source) == "temp_transcribe_audio.wav":
         os.remove(audio_source)
         logging.info("Temporary audio file removed.")
     elapsed_time = time.time() - start_time
+    new_entry_srt = {
         "File SRT": os.path.basename(srt_filename),
         "Libreria": library,
+        "Tipologia SRT": "SRT con tempi",
         "Percorso Completo": srt_filename,
         "Video Unito": None,
         "Orario Generazione": datetime.now().strftime("%H:%M:%S"),
         "Orario Unione": "",
     }
+    new_entry_txt = {
+        "File SRT": os.path.basename(txt_filename),
+        "Libreria": library,
+        "Tipologia SRT": "Testo puro",
+        "Percorso Completo": txt_filename,
+        "Video Unito": None,
+        "Orario Generazione": datetime.now().strftime("%H:%M:%S"),
+        "Orario Unione": "",
+    }
+    updated_history = current_history.copy()
+    updated_history.append(new_entry_srt)
+    updated_history.append(new_entry_txt)
     logging.debug(f"Updated history: {updated_history}")
+    # Riabilita sempre il pulsante dopo la generazione
+    return updated_history, gr.update(interactive=True), update_dataframe(updated_history), audio_editor_update, original_audio_update
 # ... (tutte le altre funzioni helper come save_srt_changes, etc. rimangono qui)
 except FileNotFoundError: VERSION = "1.0.0"
 BADGE = f"<span style='background:#1976d2;color:white;padding:2px 8px;border-radius:8px;font-size:0.9em;margin-left:8px;'>v{VERSION}</span>"
+# Loader HTML come template
+LOADER_HTML_ON = """
+<div id='subtitle-loader' style='display:block;text-align:center;margin-top:0.5em;'>
+  <span style='display:inline-block;width:24px;height:24px;border:3px solid #1976d2;border-radius:50%;border-top:3px solid transparent;animation:spin 1s linear infinite;vertical-align:middle;'></span>
+  <span style='color:#1976d2;margin-left:8px;'>Generazione sottotitoli in corso...</span>
+</div>
+<style>@keyframes spin{0%{transform:rotate(0deg);}100%{transform:rotate(360deg);}}</style>
+"""
+LOADER_HTML_OFF = """
+<div id='subtitle-loader' style='display:none;text-align:center;margin-top:0.5em;'>
+  <span style='display:inline-block;width:24px;height:24px;border:3px solid #1976d2;border-radius:50%;border-top:3px solid transparent;animation:spin 1s linear infinite;vertical-align:middle;'></span>
+  <span style='color:#1976d2;margin-left:8px;'>Generazione sottotitoli in corso...</span>
+</div>
+<style>@keyframes spin{0%{transform:rotate(0deg);}100%{transform:rotate(360deg);}}</style>
+"""
 with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<script>{js_loader_script}</script>") as demo:
     srt_history_state = gr.State([])
     selected_srt_path_state = gr.State(None)
     original_audio_path_state = gr.State()
+    gr.Markdown(f"<h1>Transcribe Speech {BADGE}</h1>")
     gr.Markdown("### 1. Carica un file")
     video_input = gr.File(label="Carica un file video o audio", file_types=["video", "audio"])
                 api_key_input = gr.Textbox(label="API Key OpenAI", type="password", placeholder="sk-...")
                 cost_estimate = gr.Markdown()
                 words_slider = gr.Slider(minimum=6, maximum=15, value=7, step=1, label="Parole per sottotitolo")
+            # --- LOADER HTML SEMPRE PRESENTE SOPRA IL PULSANTE ---
+            loader_html = gr.HTML(LOADER_HTML_OFF)
             submit_btn = gr.Button("▶️ Genera Sottotitoli", variant="primary")
             stop_btn = gr.Button("⏹️ Arresta", variant="stop", visible=False)
             loader = gr.HTML("""<div id="loader-container" style='text-align:center; display:none; margin-top:1rem;'><div style='display:inline-block; position:relative; width:50px; height:50px;'><svg width='50' height='50' viewBox='0 0 50 50'><circle cx='25' cy='25' r='20' fill='none' stroke='#1976d2' stroke-width='5' stroke-linecap='round' stroke-dasharray='100' stroke-dashoffset='60'><animateTransform attributeName='transform' type='rotate' from='0 25 25' to='360 25 25' dur='1.5s' repeatCount='indefinite'/></circle></svg><div id='timer' style='position:absolute; top:50%; left:50%; transform:translate(-50%,-50%); font-size:0.9em; color:#1976d2;'>0s</div></div></div>""")
         with gr.Column(scale=2):
             gr.Markdown("### 3. Anteprima ed Editor")
             video_preview = gr.Video(label="Anteprima Video/Audio Originale", interactive=False)
+            audio_output = gr.Audio(label="Editor Traccia Audio", editable=True, type="filepath", visible=False)
+            download_audio_btn = gr.Button("⬇️ Download Audio", variant="primary")
+            audio_download_file = gr.File(label="Scarica Audio", visible=False)
+            undo_audio_btn = gr.Button("↩️ Ripristina Audio Originale")
+            final_video = gr.Video(label="Video Finale con Sottotitoli", interactive=False, visible=False)
+            final_video_loader = gr.HTML("""
+            <div id='final-video-loader' style='display:none;text-align:center;margin-top:0.5em;'>
+              <span style='display:inline-block;width:24px;height:24px;border:3px solid #1976d2;border-radius:50%;border-top:3px solid transparent;animation:spin 1s linear infinite;vertical-align:middle;'></span>
+              <span style='color:#1976d2;margin-left:8px;'>Caricamento video sottotitolato...</span>
+            </div>
+            <style>@keyframes spin{0%{transform:rotate(0deg);}100%{transform:rotate(360deg);}}</style>
+            """)
     with gr.Column():
         gr.Markdown("--- \n### 4. Cronologia e Azioni sui Sottotitoli\n*Seleziona una riga per attivare le azioni.*")
+        history_df = gr.Dataframe(headers=["File SRT", "Libreria", "Tipologia SRT", "Orario Generazione", "Video Unito", "Orario Unione"], interactive=True)
         with gr.Row(visible=False) as action_buttons:
             edit_btn = gr.Button("📝 Modifica SRT")
             merge_btn = gr.Button("🎬 Unisci al Video", variant="secondary")
             delete_btn = gr.Button("🗑️ Elimina", variant="stop")
+            download_btn = gr.Button("⬇️ Download SRT", variant="primary")
+        srt_download_file = gr.File(label="Scarica SRT", visible=False)
         with gr.Accordion("Editor Testo Sottotitoli", open=False, visible=False) as srt_editor_accordion:
             srt_editor_box = gr.Textbox(lines=15, label="Contenuto file .srt", show_copy_button=True, interactive=True)
             save_edit_btn = gr.Button("💾 Salva Modifiche", variant="primary")
     # MODIFICATA: Logica semplificata e robusta
     def show_main_controls(file_obj):
+        import mimetypes
+        import shutil
         if file_obj:
+            file_path = file_obj.name
+            mime, _ = mimetypes.guess_type(file_path)
+            is_video = mime and mime.startswith("video")
+            is_audio = mime and mime.startswith("audio")
+            video_preview_update = gr.update(visible=is_video, value=file_path if is_video else None)
+            submit_btn_update = gr.update(interactive=True)
+            main_panel_update = gr.update(visible=True)
+            if is_audio:
+                output_dir = os.path.join(os.getcwd(), "output")
+                os.makedirs(output_dir, exist_ok=True)
+                ext = os.path.splitext(file_path)[1].lower()
+                if ext not in [".wav", ".mp3", ".flac", ".ogg"]:
+                    gr.Error("Formato audio non supportato. Usa WAV, MP3, FLAC o OGG.")
+                    return gr.update(visible=False, value=None), gr.update(visible=False), gr.update(interactive=False), gr.update(visible=False, value=None)
+                base_name = os.path.splitext(os.path.basename(file_path))[0]
+                timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+                new_audio_name = f"{base_name}_{timestamp}{ext}"
+                new_audio_path = os.path.join(output_dir, new_audio_name)
+                shutil.copy(file_path, new_audio_path)
+                rel_audio_path = os.path.relpath(new_audio_path, os.getcwd())
+                video_preview_update = gr.update(visible=False, value=None)
+                audio_output_update = gr.update(value=rel_audio_path, visible=True)
+                return video_preview_update, main_panel_update, submit_btn_update, audio_output_update
+            return video_preview_update, main_panel_update, submit_btn_update, gr.update(visible=False, value=None)
+        return gr.update(visible=False, value=None), gr.update(visible=False), gr.update(interactive=False), gr.update(visible=False, value=None)
     def on_select_srt(history_data, evt: gr.SelectData):
         if evt.index is None:
     # --- CABLAGGIO EVENTI ---
+    video_input.upload(
+        fn=show_main_controls,
+        inputs=video_input,
+        outputs=[video_preview, main_panel, submit_btn, audio_output],
+    )
     extract_audio_btn.click(
         fn=extract_audio_only,
         inputs=[video_input],
+        outputs=[audio_output, original_audio_path_state],
     )
     undo_audio_btn.click(
         outputs=openai_options,
     )
+    # Loader HTML sotto il pulsante
+    loader_html = gr.HTML(LOADER_HTML_OFF)
+    # Funzione wrapper per mostrare/nascondere loader e disabilitare/abilitare il pulsante
+    def transcribe_with_loader(*args):
+        from gradio import update
+        # Mostra loader (display:block)
+        yield None, update(interactive=False), None, gr.update(value=LOADER_HTML_ON), update(visible=True), None, gr.update(value=LOADER_HTML_ON)
+        # Esegui la funzione vera
+        result = transcribe(*args)
+        audio_update = result[3] if result[3] is not None else update(visible=False, value=None)
+        # Nascondi loader a fine processo (display:none)
+        yield result[0], update(interactive=True), result[2], gr.update(value=LOADER_HTML_OFF), audio_update, result[4], gr.update(value=LOADER_HTML_OFF)
+    # Modifica il submit_btn.click per usare la funzione wrapper e i nuovi output
     submit_btn.click(
+        fn=transcribe_with_loader,
         inputs=[
             video_input,
             audio_output,
             words_slider,
             srt_history_state,
         ],
+        outputs=[srt_history_state, submit_btn, history_df, loader_html, audio_output, original_audio_path_state, loader_html],
+        queue=True,
+        show_progress=False,
     )
     # Aggiorna il cablaggio eventi per history_df
         outputs=[srt_editor_accordion]  # Rende visibile l'accordion
     )
+    # --- FIX: Salva modifiche SRT ---
+    save_edit_btn.click(
+        fn=lambda srt_path, new_content: (save_srt_changes(srt_path, new_content), gr.update(interactive=False)),
+        inputs=[selected_srt_path_state, srt_editor_box],
+        outputs=[save_edit_btn],
+    )
+    # --- FIX: Abilita/disabilita il pulsante Salva solo se ci sono modifiche ---
+    def enable_save_btn(srt_path, new_content):
+        if not srt_path or not os.path.exists(srt_path):
+            return gr.update(interactive=False)
+        try:
+            with open(srt_path, 'r', encoding='utf-8') as f:
+                original = f.read()
+            if original != new_content:
+                return gr.update(interactive=True)
+            else:
+                return gr.update(interactive=False)
+        except Exception:
+            return gr.update(interactive=False)
+    srt_editor_box.change(
+        fn=enable_save_btn,
+        inputs=[selected_srt_path_state, srt_editor_box],
+        outputs=[save_edit_btn],
+    )
+    # Disabilita il pulsante Salva quando si seleziona un nuovo file
+    edit_btn.click(
+        fn=lambda: gr.update(interactive=False),
+        inputs=[],
+        outputs=[save_edit_btn],
+    )
     # Aggiorna il cablaggio eventi per merge_btn
     merge_btn.click(
+        fn=lambda video_path, srt_path: (
+            gr.update(visible=True),  # Mostra loader
+            gr.update(visible=False),  # Nascondi il player video
+        ),
         inputs=[video_input, selected_srt_path_state],
+        outputs=[final_video, final_video_loader],
+        queue=True,
+        show_progress=False,
+    )
+    merge_btn.click(
+        fn=lambda video_path, srt_path: (
+            gr.update(visible=True, value=merge_subtitles(video_path, srt_path)[0]),  # Mostra video
+            gr.update(visible=False),  # Nascondi loader
+        ),
+        inputs=[video_input, selected_srt_path_state],
+        outputs=[final_video, final_video_loader],
+        queue=True,
+        show_progress=False,
     )
     # Rende visibile il video finale quando viene cliccato il pulsante
         outputs=[final_video]  # Rende visibile il componente del video finale
     )
+    # Riabilita il pulsante 'Genera Sottotitoli' dopo l'unione
+    merge_btn.click(
+        fn=lambda: gr.update(interactive=True),
+        inputs=[],
+        outputs=[submit_btn]
+    )
     # Aggiorna il cablaggio eventi per delete_btn
     delete_btn.click(
         fn=delete_selected,
         outputs=[srt_history_state, action_buttons]  # Update history and hide action buttons
     )
+    # Download SRT: mostra il file selezionato come download
+    download_btn.click(
+        fn=lambda srt_path: gr.update(value=srt_path, visible=True) if srt_path and os.path.exists(srt_path) else gr.update(visible=False),
+        inputs=[selected_srt_path_state],
+        outputs=[srt_download_file],
+    )
+    # Download Audio: mostra il file audio corrente come download
+    download_audio_btn.click(
+        fn=lambda audio_path: gr.update(value=audio_path, visible=True) if audio_path and os.path.exists(audio_path) else gr.update(visible=False),
+        inputs=[audio_output],
+        outputs=[audio_download_file],
+    )
+# --- PULIZIA FILE DI OUTPUT ALL'AVVIO ---
+def clean_output_dirs():
+    for folder in ["output", os.path.join("output", "subtitles")]:
+        if os.path.exists(folder):
+            for filename in os.listdir(folder):
+                file_path = os.path.join(folder, filename)
+                try:
+                    if os.path.isfile(file_path) or os.path.islink(file_path):
+                        os.unlink(file_path)
+                    elif os.path.isdir(file_path):
+                        shutil.rmtree(file_path)
+                except Exception as e:
+                    print(f"Errore durante la cancellazione di {file_path}: {e}")
+clean_output_dirs()
 if __name__ == "__main__":
     demo.queue().launch()  # Rimosso `share=True` per eseguire l'app localmente

manifest.json CHANGED Viewed

@@ -1,3 +1,3 @@
 {
-  "version": "1.1.0"
 }

 {
+  "version": "1.2.0"
 }

output/Audio 2_ Take 2 mp3cut.net_20250623192512.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:305fbc33f6c73d5999d4f95939dda56fc461e5b277cb685cc5ad9bd1f95a42b1
+size 10645622

requirements.txt CHANGED Viewed

@@ -2,5 +2,5 @@ gradio
 pandas
 faster-whisper
 moviepy==1.0.3
-openai
 ffmpeg-python  # Aggiunto per robustezza, anche se usiamo subprocess

 pandas
 faster-whisper
 moviepy==1.0.3
+openai>=1.0.0
 ffmpeg-python  # Aggiunto per robustezza, anche se usiamo subprocess

src/__pycache__/subtitle_extractor.cpython-313.pyc CHANGED Viewed

Binary files a/src/__pycache__/subtitle_extractor.cpython-313.pyc and b/src/__pycache__/subtitle_extractor.cpython-313.pyc differ

src/subtitle_extractor.py CHANGED Viewed

@@ -8,6 +8,8 @@ import subprocess
 from dataclasses import dataclass
 from typing import List, Optional
 # MoviePy is an optional dependency used when extracting audio. It is imported
 # lazily to avoid issues when running in environments where it is not
 # available (for instance during unit tests).
@@ -19,6 +21,8 @@ except ImportError:  # pragma: no cover - optional dependency
 logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
 def format_timestamp(seconds: float) -> str:
     """Return timestamp in SRT format."""
@@ -63,43 +67,109 @@ def _segments_to_srt(segments: List[SubtitleLine]) -> str:
     return "\n".join(lines)
 def transcribe_audio(
     audio_path: str,
     library: str = "faster_whisper",
     api_key: Optional[str] = None,
     model_size: str = "base",
     words_per_sub: int = 7,
-) -> str:
-    """Transcribe *audio_path* and return SRT content."""
     logging.debug(f"Starting transcription with library: {library}, audio_path: {audio_path}")
     if library == "OpenAI Whisper":
         if api_key is None:
             raise ValueError("api_key is required for OpenAI Whisper")
         import openai
         openai.api_key = api_key
-        logging.debug("Calling OpenAI Whisper API...")
-        with open(audio_path, "rb") as audio_file:
-            result = openai.Audio.transcribe(
-                model="whisper-1",
-                file=audio_file,
-                response_format="json",
-            )
-            logging.debug(f"OpenAI API response: {result}")
-            words = result.get("text", "").split()
-            if not words:
-                logging.error("No text returned by OpenAI Whisper API.")
-                raise ValueError("No text returned by OpenAI Whisper API.")
-            segments = []
-            start = 0.0
-            step = 3.0
-            for i in range(0, len(words), words_per_sub):
-                end = start + step
-                text = " ".join(words[i : i + words_per_sub])
-                segments.append(SubtitleLine(start=start, end=end, text=text))
-                start = end
-            logging.debug(f"Generated segments: {segments}")
     else:
         if WhisperModel is None:
             raise RuntimeError("faster_whisper is not installed")
@@ -107,6 +177,7 @@ def transcribe_audio(
         model = WhisperModel(model_size)
         segs = model.transcribe(audio_path)[0]
         segments = [SubtitleLine(start=s.start, end=s.end, text=s.text) for s in segs]
         logging.debug(f"Generated segments: {segments}")
     if not segments:
@@ -115,7 +186,7 @@ def transcribe_audio(
     srt_content = _segments_to_srt(segments)
     logging.debug(f"Generated SRT content: {srt_content}")
-    return srt_content
 def save_srt(content: str, output_path: str) -> str:
@@ -124,6 +195,12 @@ def save_srt(content: str, output_path: str) -> str:
     return output_path
 def merge_subtitles(video_path: str, srt_path: str, output_path: str) -> str:
     command = [
         "ffmpeg",

 from dataclasses import dataclass
 from typing import List, Optional
+from pydub import AudioSegment
 # MoviePy is an optional dependency used when extracting audio. It is imported
 # lazily to avoid issues when running in environments where it is not
 # available (for instance during unit tests).
 logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
+MAX_OPENAI_AUDIO_SIZE = 25 * 1024 * 1024  # 25 MB
 def format_timestamp(seconds: float) -> str:
     """Return timestamp in SRT format."""
     return "\n".join(lines)
+def _export_and_transcribe_segment(seg, idx, audio_path, openai, words_per_sub, time_offset):
+    """Esporta un segmento in MP3, verifica la dimensione e lo suddivide ricorsivamente se necessario."""
+    import tempfile
+    segment_list = []
+    txt_list = []
+    with tempfile.NamedTemporaryFile(suffix=f"_part{idx}.mp3", delete=False) as temp_file:
+        seg.export(temp_file.name, format="mp3")
+        temp_size = os.path.getsize(temp_file.name)
+        logging.debug(f"Segmento {idx}: dimensione {temp_size} byte (MP3)")
+        if temp_size > MAX_OPENAI_AUDIO_SIZE:
+            # Suddividi ulteriormente il segmento
+            logging.info(f"Segmento {idx} ancora troppo grande, suddivisione ricorsiva...")
+            duration_ms = len(seg)
+            mid = duration_ms // 2
+            seg1 = seg[:mid]
+            seg2 = seg[mid:]
+            # Ricorsione su ciascuna metà
+            segs1, txts1 = _export_and_transcribe_segment(seg1, f"{idx}a", audio_path, openai, words_per_sub, time_offset)
+            segs2, txts2 = _export_and_transcribe_segment(seg2, f"{idx}b", audio_path, openai, words_per_sub, time_offset + seg1.duration_seconds)
+            segment_list.extend(segs1)
+            segment_list.extend(segs2)
+            txt_list.extend(txts1)
+            txt_list.extend(txts2)
+        else:
+            with open(temp_file.name, "rb") as audio_file:
+                result = openai.audio.transcriptions.create(
+                    model="whisper-1",
+                    file=audio_file,
+                    response_format="json",
+                )
+            words = result.text.split()
+            plain = result.text.strip()
+            txt_list.append(plain)
+            # Ricostruisci segmenti SRT con offset temporale
+            segs = []
+            start = time_offset
+            step = 3.0
+            for i in range(0, len(words), words_per_sub):
+                end = start + step
+                text = " ".join(words[i : i + words_per_sub])
+                segs.append(SubtitleLine(start=start, end=end, text=text))
+                start = end
+            segment_list.extend(segs)
+        os.remove(temp_file.name)
+    return segment_list, txt_list
 def transcribe_audio(
     audio_path: str,
     library: str = "faster_whisper",
     api_key: Optional[str] = None,
     model_size: str = "base",
     words_per_sub: int = 7,
+) -> tuple[str, str]:
+    """Transcribe *audio_path* and return (SRT content, plain text content)."""
     logging.debug(f"Starting transcription with library: {library}, audio_path: {audio_path}")
+    plain_text = None
     if library == "OpenAI Whisper":
         if api_key is None:
             raise ValueError("api_key is required for OpenAI Whisper")
         import openai
         openai.api_key = api_key
+        # --- Gestione file troppo grandi ---
+        if os.path.getsize(audio_path) > MAX_OPENAI_AUDIO_SIZE:
+            logging.info("Audio troppo grande, suddivisione in segmenti...")
+            audio = AudioSegment.from_file(audio_path)
+            duration_ms = len(audio)
+            segment_length_ms = 20 * 60 * 1000
+            segments = [audio[i : i + segment_length_ms] for i in range(0, duration_ms, segment_length_ms)]
+            srt_parts = []
+            txt_parts = []
+            time_offset = 0.0
+            for idx, seg in enumerate(segments):
+                segs, txts = _export_and_transcribe_segment(seg, idx, audio_path, openai, words_per_sub, time_offset)
+                srt_parts.extend(segs)
+                txt_parts.extend(txts)
+                time_offset += seg.duration_seconds
+            segments = srt_parts
+            plain_text = " ".join(txt_parts)
+        else:
+            with open(audio_path, "rb") as audio_file:
+                result = openai.audio.transcriptions.create(
+                    model="whisper-1",
+                    file=audio_file,
+                    response_format="json",
+                )
+                logging.debug(f"OpenAI API response: {result}")
+                words = result.text.split()
+                plain_text = result.text.strip()
+                if not words:
+                    logging.error("No text returned by OpenAI Whisper API.")
+                    raise ValueError("No text returned by OpenAI Whisper API.")
+                segments = []
+                start = 0.0
+                step = 3.0
+                for i in range(0, len(words), words_per_sub):
+                    end = start + step
+                    text = " ".join(words[i : i + words_per_sub])
+                    segments.append(SubtitleLine(start=start, end=end, text=text))
+                    start = end
+                logging.debug(f"Generated segments: {segments}")
     else:
         if WhisperModel is None:
             raise RuntimeError("faster_whisper is not installed")
         model = WhisperModel(model_size)
         segs = model.transcribe(audio_path)[0]
         segments = [SubtitleLine(start=s.start, end=s.end, text=s.text) for s in segs]
+        plain_text = " ".join([s.text.strip() for s in segments])
         logging.debug(f"Generated segments: {segments}")
     if not segments:
     srt_content = _segments_to_srt(segments)
     logging.debug(f"Generated SRT content: {srt_content}")
+    return srt_content, plain_text
 def save_srt(content: str, output_path: str) -> str:
     return output_path
+def save_txt(content: str, output_path: str) -> str:
+    with open(output_path, "w", encoding="utf-8") as f:
+        f.write(content)
+    return output_path
 def merge_subtitles(video_path: str, srt_path: str, output_path: str) -> str:
     command = [
         "ffmpeg",