Spaces:

danzapp70
/

myTools

Sleeping

App Files Files Community

danzapp70 commited on Jun 19, 2025

Commit

0d6f640

verified ·

1 Parent(s): d75acb1

Deploy version v1.1.0

Browse files

Files changed (9) hide show

Documentation.md +0 -0
README.md +5 -2
app.py +198 -62
manifest.json +1 -1
requirements.txt +1 -1
src/__pycache__/subtitle_extractor.cpython-313.pyc +0 -0
src/main.py +35 -0
src/subtitle_extractor.py +143 -0
tests/test_subtitle_extractor.py +12 -0

Documentation.md ADDED Viewed

File without changes

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 title: MyTools
-emoji: 🌍
 colorFrom: blue
 colorTo: indigo
 sdk: gradio
@@ -9,4 +9,7 @@ app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: MyTools
+emoji: "🌍"
 colorFrom: blue
 colorTo: indigo
 sdk: gradio
 pinned: false
 ---
+This repository contains a simple tool to extract subtitles from a video file.
+It provides a minimal CLI in `src/main.py` and a Gradio interface defined in `app.py`.
+Refer to the [Hugging Face Spaces documentation](https://huggingface.co/docs/hub/spaces-config-reference) for configuration options.

app.py CHANGED Viewed

@@ -3,7 +3,8 @@ import os
 import json
 import logging
 from moviepy.editor import VideoFileClip, AudioFileClip
-import openai
 import time
 import shutil
 import subprocess
@@ -22,7 +23,7 @@ except ImportError:
     WhisperModel = None
     logging.warning("Libreria 'faster_whisper' non trovata. La funzionalità sarà disabilitata.")
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logging.info(f"Directory temporanea creata: {TEMP_DIR}")
 stop_requested = False
@@ -78,44 +79,87 @@ def merge_subtitles(video_path, srt_path, progress=gr.Progress(track_tqdm=True))
         gr.Error(f"Errore ffmpeg: {e}"); return None, None
 def transcribe(video_path, edited_audio_path, library, api_key, words_per_sub, current_history):
-    start_time = time.time(); global stop_requested
-    if stop_requested: return current_history, gr.update(interactive=True), None
-    audio_source_for_transcription = ""
     if edited_audio_path and os.path.exists(edited_audio_path):
-        gr.Info("Uso l'audio modificato per la trascrizione.")
-        audio_source_for_transcription = edited_audio_path
     elif video_path and os.path.exists(video_path):
-        gr.Info("Estraggo l'audio dal video originale per la trascrizione...")
         try:
             video = VideoFileClip(video_path)
-            audio_source_for_transcription = os.path.join(TEMP_DIR, "temp_transcribe_audio.wav")
-            video.audio.write_audiofile(audio_source_for_transcription, logger=None)
         except Exception as e:
-            gr.Error(f"Errore estrazione audio: {e}"); return current_history, gr.update(interactive=True), None
     else:
-        gr.Error("Nessuna sorgente video o audio valida."); return current_history, gr.update(interactive=True), None
-    # Logica di trascrizione effettiva
-    # (Ometto il corpo delle funzioni transcribe_video e transcribe_with_openai_whisper per brevità,
-    # ma la logica sottostante è la stessa delle versioni precedenti)
-    # Simuliamo il risultato per mantenere la struttura
-    srt_filename = os.path.join(TEMP_DIR, "placeholder.srt")
-    with open(srt_filename, "w") as f: f.write("1\n00:00:01,000 --> 00:00:02,000\nTest\n\n")
-    library_used = library
-    cost = "$0.00"
-    success_msg = "Trascrizione completata"
-    if os.path.exists(audio_source_for_transcription) and "temp_transcribe_audio" in audio_source_for_transcription:
-        os.remove(audio_source_for_transcription)
-    gr.Info("Trascrizione completata.")
     elapsed_time = time.time() - start_time
-    new_entry = {"File SRT": os.path.basename(srt_filename), "Libreria": library_used, "Tempo Impiegato (s)": f"{elapsed_time:.2f}", "Costo": cost, "Orario Generazione": datetime.now().strftime("%H:%M:%S"), "Orario Unione": "", "Percorso Completo": srt_filename, "Video Unito": None}
-    updated_history = [entry for entry in current_history if entry["File SRT"] != os.path.basename(srt_filename)]
     updated_history.append(new_entry)
-    return updated_history, gr.update(interactive=False), success_msg
 # ... (tutte le altre funzioni helper come save_srt_changes, etc. rimangono qui)
@@ -127,11 +171,21 @@ def save_srt_changes(srt_path, new_content):
     except Exception as e: gr.Error(f"Errore salvataggio: {e}")
 def show_srt_for_editing(srt_path):
     if not srt_path or not os.path.exists(srt_path):
-        gr.Warning("Nessun SRT selezionato."); return None, gr.update(visible=False)
-    with open(srt_path, 'r', encoding='utf-8') as f: content = f.read()
-    return content, gr.update(visible=True, open=True)
 js_loader_script = "function startLoader(){const l=document.getElementById('loader-container');l&&(l.style.display='block',window.loaderInterval&&clearInterval(window.loaderInterval),document.getElementById('timer').innerText='0s',window.loaderInterval=setInterval(()=>{document.getElementById('timer').innerText=parseInt(document.getElementById('timer').innerText)+1+'s'},1e3))}function stopLoader(){const l=document.getElementById('loader-container');l&&(l.style.display='none',window.loaderInterval&&clearInterval(window.loaderInterval))}"
@@ -169,17 +223,7 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
             gr.Markdown("### 3. Anteprima ed Editor")
             video_preview = gr.Video(label="Anteprima Video/Audio Originale", interactive=False)
             with gr.Group(visible=False) as audio_editor_group:
-                audio_output = gr.Audio(
-                    label="Editor Traccia Audio",
-                    type="filepath",
-                    editable=True,            # abilita il trim
-                    interactive=True,         # mostra la waveform e gli handle
-                    waveform_options={        # (opzionale) personalizza l’aspetto
-                        "show_controls": True,
-                        "skip_length": 1,              # tasti +1s / –1s
-                        "trim_region_color": "#1976d2" # colore della selezione
-                    }
-                )
                 undo_audio_btn = gr.Button("↩️ Ripristina Audio Originale")
             final_video = gr.Video(label="Video Finale con Sottotitoli", interactive=False)
@@ -191,7 +235,7 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
             merge_btn = gr.Button("🎬 Unisci al Video", variant="secondary")
             delete_btn = gr.Button("🗑️ Elimina", variant="stop")
         with gr.Accordion("Editor Testo Sottotitoli", open=False, visible=False) as srt_editor_accordion:
-            srt_editor_box = gr.Textbox(lines=15, label="Contenuto file .srt", show_copy_button=True)
             save_edit_btn = gr.Button("💾 Salva Modifiche", variant="primary")
     # --- FUNZIONI HELPER E LOGICA EVENTI ---
@@ -205,16 +249,39 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
         return gr.update(visible=False, value=None), gr.update(visible=False), gr.update(interactive=False)
     def on_select_srt(history_data, evt: gr.SelectData):
-        if evt.index is None: return None, gr.update(visible=False), gr.update(visible=False), None
         selected_entry = history_data[evt.index[0]]
-        return selected_entry["Percorso Completo"], gr.update(visible=True), gr.update(visible=False), selected_entry.get("Video Unito")
     def update_dataframe(history_list):
-        if not history_list: return pd.DataFrame(columns=["File SRT", "Libreria", "Orario Generazione", "Video Unito", "Orario Unione"])
         display_list = []
         for entry in history_list:
-            display_entry = entry.copy(); display_entry["Video Unito"] = "✔️" if entry.get("Video Unito") else ""; display_list.append(display_entry)
-        return pd.DataFrame(display_list)[["File SRT", "Libreria", "Orario Generazione", "Video Unito", "Orario Unione"]]
     def delete_selected(history_data, srt_path_to_delete):
         if not srt_path_to_delete: gr.Warning("Nessun file selezionato."); return history_data, gr.update(visible=False)
@@ -235,15 +302,84 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
     # --- CABLAGGIO EVENTI ---
     video_input.upload(fn=show_main_controls, inputs=video_input, outputs=[video_preview, main_panel, submit_btn])
-    extract_audio_btn.click(fn=extract_audio_only, inputs=[video_input], outputs=[audio_output, original_audio_path_state, audio_editor_group])
-    undo_audio_btn.click(fn=lambda path: path, inputs=[original_audio_path_state], outputs=[audio_output])
-    # (Lascio qui il resto del cablaggio eventi per completezza)
-    # ...
-    # submit_event = submit_btn.click(...)
-    # ...
 if __name__ == "__main__":
-    demo.queue().launch(share=True)

 import json
 import logging
 from moviepy.editor import VideoFileClip, AudioFileClip
+from src.subtitle_extractor import transcribe_audio, save_srt
 import time
 import shutil
 import subprocess
     WhisperModel = None
     logging.warning("Libreria 'faster_whisper' non trovata. La funzionalità sarà disabilitata.")
+logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
 logging.info(f"Directory temporanea creata: {TEMP_DIR}")
 stop_requested = False
         gr.Error(f"Errore ffmpeg: {e}"); return None, None
 def transcribe(video_path, edited_audio_path, library, api_key, words_per_sub, current_history):
+    logging.debug("Starting transcription process...")
+    start_time = time.time()
+    global stop_requested
+    if stop_requested:
+        logging.warning("Transcription stopped by user.")
+        return current_history, gr.update(interactive=True), update_dataframe(current_history)
+    audio_source = None
     if edited_audio_path and os.path.exists(edited_audio_path):
+        logging.info("Using edited audio for transcription.")
+        audio_source = edited_audio_path
     elif video_path and os.path.exists(video_path):
+        logging.info("Extracting audio from original video for transcription...")
         try:
             video = VideoFileClip(video_path)
+            audio_source = os.path.join(TEMP_DIR, "temp_transcribe_audio.wav")
+            video.audio.write_audiofile(audio_source, logger=None)
+            logging.info(f"Audio extracted to: {audio_source}")
         except Exception as e:
+            logging.error(f"Error extracting audio: {e}")
+            return current_history, gr.update(interactive=True), update_dataframe(current_history)
     else:
+        logging.error("No valid video or audio source provided.")
+        return current_history, gr.update(interactive=True), update_dataframe(current_history)
+    try:
+        if library == "OpenAI Whisper":
+            if not api_key:
+                logging.error("Missing OpenAI API Key.")
+                gr.Error("API Key OpenAI mancante.")
+                return current_history, gr.update(interactive=True), update_dataframe(current_history)
+            logging.info("Using OpenAI Whisper for transcription.")
+            srt_content = transcribe_audio(
+                audio_source,
+                library="OpenAI Whisper",
+                api_key=api_key,
+                words_per_sub=int(words_per_sub),
+            )
+        else:
+            logging.info("Using Faster Whisper for transcription.")
+            srt_content = transcribe_audio(
+                audio_source,
+                library="faster_whisper",
+                api_key=None,
+                words_per_sub=int(words_per_sub),
+            )
+        logging.debug("Transcription completed successfully.")
+    except Exception as e:
+        logging.error(f"Error during transcription: {e}")
+        gr.Error(f"Errore trascrizione: {e}")
+        return current_history, gr.update(interactive=True), update_dataframe(current_history)
+    base_name = os.path.splitext(os.path.basename(video_path or audio_source))[0]
+    srt_filename = os.path.join(TEMP_DIR, f"{base_name}.srt")
+    try:
+        save_srt(srt_content, srt_filename)
+        logging.info(f"SRT file saved successfully at: {srt_filename}")
+    except Exception as e:
+        logging.error(f"Error saving SRT file: {e}")
+        return current_history, gr.update(interactive=True), update_dataframe(current_history)
+    if audio_source.startswith(TEMP_DIR) and os.path.basename(audio_source) == "temp_transcribe_audio.wav":
+        os.remove(audio_source)
+        logging.info("Temporary audio file removed.")
     elapsed_time = time.time() - start_time
+    new_entry = {
+        "File SRT": os.path.basename(srt_filename),
+        "Libreria": library,
+        "Tempo Impiegato (s)": f"{elapsed_time:.2f}",
+        "Percorso Completo": srt_filename,
+        "Video Unito": None,
+        "Orario Generazione": datetime.now().strftime("%H:%M:%S"),
+        "Orario Unione": "",
+    }
+    logging.debug(f"Adding new entry to history: {new_entry}")
+    updated_history = [e for e in current_history if e["File SRT"] != os.path.basename(srt_filename)]
     updated_history.append(new_entry)
+    logging.debug(f"Updated history: {updated_history}")
+    return updated_history, gr.update(interactive=False), update_dataframe(updated_history)
 # ... (tutte le altre funzioni helper come save_srt_changes, etc. rimangono qui)
     except Exception as e: gr.Error(f"Errore salvataggio: {e}")
 def show_srt_for_editing(srt_path):
+    logging.info(f"show_srt_for_editing triggered with srt_path: {srt_path}")
     if not srt_path or not os.path.exists(srt_path):
+        logging.warning("Percorso SRT non valido o file inesistente.")
+        return gr.update(value=None, visible=False)
+    try:
+        # Legge il contenuto del file SRT
+        with open(srt_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+        logging.info("Contenuto del file SRT caricato con successo.")
+        # Rende visibile il box di modifica con il contenuto del file
+        return gr.update(value=content, visible=True)
+    except Exception as e:
+        logging.error(f"Errore durante la lettura del file SRT: {e}")
+        return gr.update(value=None, visible=False)
 js_loader_script = "function startLoader(){const l=document.getElementById('loader-container');l&&(l.style.display='block',window.loaderInterval&&clearInterval(window.loaderInterval),document.getElementById('timer').innerText='0s',window.loaderInterval=setInterval(()=>{document.getElementById('timer').innerText=parseInt(document.getElementById('timer').innerText)+1+'s'},1e3))}function stopLoader(){const l=document.getElementById('loader-container');l&&(l.style.display='none',window.loaderInterval&&clearInterval(window.loaderInterval))}"
             gr.Markdown("### 3. Anteprima ed Editor")
             video_preview = gr.Video(label="Anteprima Video/Audio Originale", interactive=False)
             with gr.Group(visible=False) as audio_editor_group:
+                audio_output = gr.Audio(label="Editor Traccia Audio", editable=True, type="filepath")
                 undo_audio_btn = gr.Button("↩️ Ripristina Audio Originale")
             final_video = gr.Video(label="Video Finale con Sottotitoli", interactive=False)
             merge_btn = gr.Button("🎬 Unisci al Video", variant="secondary")
             delete_btn = gr.Button("🗑️ Elimina", variant="stop")
         with gr.Accordion("Editor Testo Sottotitoli", open=False, visible=False) as srt_editor_accordion:
+            srt_editor_box = gr.Textbox(lines=15, label="Contenuto file .srt", show_copy_button=True, interactive=True)
             save_edit_btn = gr.Button("💾 Salva Modifiche", variant="primary")
     # --- FUNZIONI HELPER E LOGICA EVENTI ---
         return gr.update(visible=False, value=None), gr.update(visible=False), gr.update(interactive=False)
     def on_select_srt(history_data, evt: gr.SelectData):
+        if evt.index is None:
+            return None, gr.update(visible=False), gr.update(visible=False), None
         selected_entry = history_data[evt.index[0]]
+        srt_path = selected_entry["Percorso Completo"]
+        # Controlla se il file SRT esiste
+        if not os.path.exists(srt_path):
+            gr.Warning("Il file SRT selezionato non esiste.")
+            return None, gr.update(visible=False), gr.update(visible=False), None
+        # Ritorna il percorso selezionato e rende visibili i pulsanti delle azioni
+        return (
+            srt_path,  # Percorso del file SRT selezionato
+            gr.update(visible=True),  # Rende visibili i pulsanti delle azioni
+            gr.update(visible=False)  # Nasconde il box di modifica inizialmente
+        )
     def update_dataframe(history_list):
+        if not history_list:
+            logging.debug("History list is empty. Returning empty dataframe.")
+            return pd.DataFrame(columns=["File SRT", "Libreria", "Orario Generazione", "Video Unito", "Orario Unione"])
         display_list = []
         for entry in history_list:
+            display_entry = entry.copy()
+            display_entry["Video Unito"] = "✔️" if entry.get("Video Unito") else ""
+            display_list.append(display_entry)
+        logging.debug(f"Updated dataframe with entries: {display_list}")
+        return pd.DataFrame(display_list)[
+            ["File SRT", "Libreria", "Orario Generazione", "Video Unito", "Orario Unione"]
+        ]
     def delete_selected(history_data, srt_path_to_delete):
         if not srt_path_to_delete: gr.Warning("Nessun file selezionato."); return history_data, gr.update(visible=False)
     # --- CABLAGGIO EVENTI ---
     video_input.upload(fn=show_main_controls, inputs=video_input, outputs=[video_preview, main_panel, submit_btn])
+    extract_audio_btn.click(
+        fn=extract_audio_only,
+        inputs=[video_input],
+        outputs=[audio_output, original_audio_path_state, audio_editor_group],
+    )
+    undo_audio_btn.click(
+        fn=lambda path: path,
+        inputs=[original_audio_path_state],
+        outputs=[audio_output],
+    )
+    library_selector.change(
+        lambda lib: gr.update(visible=lib == "OpenAI Whisper"),
+        inputs=library_selector,
+        outputs=openai_options,
+    )
+    submit_btn.click(
+        fn=transcribe,
+        inputs=[
+            video_input,
+            audio_output,
+            library_selector,
+            api_key_input,
+            words_slider,
+            srt_history_state,
+        ],
+        outputs=[srt_history_state, submit_btn, history_df],
+    )
+    # Aggiorna il cablaggio eventi per history_df
+    history_df.select(
+        fn=on_select_srt,
+        inputs=[srt_history_state],
+        outputs=[
+            selected_srt_path_state,  # Percorso del file SRT selezionato
+            action_buttons,          # Rende visibili i pulsanti delle azioni
+            srt_editor_accordion     # Nasconde il box di modifica inizialmente
+        ]
+    )
+    # Aggiorna il cablaggio eventi per edit_btn
+    edit_btn.click(
+        fn=show_srt_for_editing,
+        inputs=[selected_srt_path_state],
+        outputs=[srt_editor_box]  # Aggiorna solo il contenuto del box
+    )
+    # Rende visibile il box di modifica quando viene cliccato il pulsante
+    edit_btn.click(
+        fn=lambda: gr.update(visible=True),
+        inputs=[],
+        outputs=[srt_editor_accordion]  # Rende visibile l'accordion
+    )
+    # Aggiorna il cablaggio eventi per merge_btn
+    merge_btn.click(
+        fn=merge_subtitles,
+        inputs=[video_input, selected_srt_path_state],
+        outputs=[final_video]  # Aggiorna solo il contenuto del video finale
+    )
+    # Rende visibile il video finale quando viene cliccato il pulsante
+    merge_btn.click(
+        fn=lambda: gr.update(visible=True),
+        inputs=[],
+        outputs=[final_video]  # Rende visibile il componente del video finale
+    )
+    # Aggiorna il cablaggio eventi per delete_btn
+    delete_btn.click(
+        fn=delete_selected,
+        inputs=[srt_history_state, selected_srt_path_state],
+        outputs=[srt_history_state, action_buttons]  # Update history and hide action buttons
+    )
 if __name__ == "__main__":
+    demo.queue().launch()  # Rimosso `share=True` per eseguire l'app localmente

manifest.json CHANGED Viewed

@@ -1,3 +1,3 @@
 {
-  "version": "1.0.0"
 }

 {
+  "version": "1.1.0"
 }

requirements.txt CHANGED Viewed

@@ -3,4 +3,4 @@ pandas
 faster-whisper
 moviepy==1.0.3
 openai
-ffmpeg-python # Aggiunto per robustezza, anche se usiamo subprocess

 faster-whisper
 moviepy==1.0.3
 openai
+ffmpeg-python  # Aggiunto per robustezza, anche se usiamo subprocess

src/__pycache__/subtitle_extractor.cpython-313.pyc ADDED Viewed

Binary file (7.08 kB). View file

src/main.py CHANGED Viewed

	@@ -0,0 +1,35 @@

+"""Simple CLI interface for the subtitle extractor."""
+import argparse
+import os
+import tempfile
+from .subtitle_extractor import (
+    extract_audio,
+    transcribe_audio,
+    save_srt,
+    merge_subtitles,
+)
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Generate subtitles from a video")
+    parser.add_argument("video", help="Path to the video file")
+    parser.add_argument("--library", choices=["faster_whisper", "OpenAI Whisper"], default="faster_whisper")
+    parser.add_argument("--api-key", help="OpenAI API key if using OpenAI Whisper")
+    parser.add_argument("--output", help="Output directory", default="output")
+    parser.add_argument("--merge", action="store_true", help="Merge subtitles with video")
+    args = parser.parse_args()
+    audio_path = extract_audio(args.video, args.output)
+    srt_content = transcribe_audio(audio_path, library=args.library, api_key=args.api_key)
+    srt_path = save_srt(srt_content, os.path.join(args.output, "subtitles.srt"))
+    print(f"Generated subtitles: {srt_path}")
+    if args.merge:
+        merged = merge_subtitles(args.video, srt_path, os.path.join(args.output, "merged.mp4"))
+        print(f"Merged video saved to: {merged}")
+if __name__ == "__main__":
+    main()

src/subtitle_extractor.py CHANGED Viewed

	@@ -0,0 +1,143 @@

+"""Utility functions for extracting audio, transcribing and merging subtitles."""
+from __future__ import annotations
+import logging
+import os
+import subprocess
+from dataclasses import dataclass
+from typing import List, Optional
+# MoviePy is an optional dependency used when extracting audio. It is imported
+# lazily to avoid issues when running in environments where it is not
+# available (for instance during unit tests).
+try:
+    from faster_whisper import WhisperModel
+except ImportError:  # pragma: no cover - optional dependency
+    WhisperModel = None
+logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
+def format_timestamp(seconds: float) -> str:
+    """Return timestamp in SRT format."""
+    h = int(seconds // 3600)
+    m = int((seconds % 3600) // 60)
+    s = int(seconds % 60)
+    ms = int((seconds - int(seconds)) * 1000)
+    return f"{h:02}:{m:02}:{s:02},{ms:03}"
+def extract_audio(video_path: str, output_dir: str) -> str:
+    """Extract audio from *video_path* and return the audio file path."""
+    if not os.path.exists(video_path):
+        raise FileNotFoundError(video_path)
+    os.makedirs(output_dir, exist_ok=True)
+    base_name = os.path.splitext(os.path.basename(video_path))[0]
+    audio_path = os.path.join(output_dir, f"{base_name}.wav")
+    # Import here so tests that do not require MoviePy can run without the
+    # dependency installed.
+    from moviepy.editor import VideoFileClip
+    clip = VideoFileClip(video_path)
+    clip.audio.write_audiofile(audio_path, logger=None)
+    clip.close()
+    return audio_path
+@dataclass
+class SubtitleLine:
+    start: float
+    end: float
+    text: str
+def _segments_to_srt(segments: List[SubtitleLine]) -> str:
+    lines = []
+    for idx, seg in enumerate(segments, 1):
+        lines.append(str(idx))
+        lines.append(f"{format_timestamp(seg.start)} --> {format_timestamp(seg.end)}")
+        lines.append(seg.text.strip())
+        lines.append("")
+    return "\n".join(lines)
+def transcribe_audio(
+    audio_path: str,
+    library: str = "faster_whisper",
+    api_key: Optional[str] = None,
+    model_size: str = "base",
+    words_per_sub: int = 7,
+) -> str:
+    """Transcribe *audio_path* and return SRT content."""
+    logging.debug(f"Starting transcription with library: {library}, audio_path: {audio_path}")
+    if library == "OpenAI Whisper":
+        if api_key is None:
+            raise ValueError("api_key is required for OpenAI Whisper")
+        import openai
+        openai.api_key = api_key
+        logging.debug("Calling OpenAI Whisper API...")
+        with open(audio_path, "rb") as audio_file:
+            result = openai.Audio.transcribe(
+                model="whisper-1",
+                file=audio_file,
+                response_format="json",
+            )
+            logging.debug(f"OpenAI API response: {result}")
+            words = result.get("text", "").split()
+            if not words:
+                logging.error("No text returned by OpenAI Whisper API.")
+                raise ValueError("No text returned by OpenAI Whisper API.")
+            segments = []
+            start = 0.0
+            step = 3.0
+            for i in range(0, len(words), words_per_sub):
+                end = start + step
+                text = " ".join(words[i : i + words_per_sub])
+                segments.append(SubtitleLine(start=start, end=end, text=text))
+                start = end
+            logging.debug(f"Generated segments: {segments}")
+    else:
+        if WhisperModel is None:
+            raise RuntimeError("faster_whisper is not installed")
+        logging.debug("Using Faster Whisper for transcription...")
+        model = WhisperModel(model_size)
+        segs = model.transcribe(audio_path)[0]
+        segments = [SubtitleLine(start=s.start, end=s.end, text=s.text) for s in segs]
+        logging.debug(f"Generated segments: {segments}")
+    if not segments:
+        logging.error("No segments generated during transcription.")
+        raise ValueError("No segments generated during transcription.")
+    srt_content = _segments_to_srt(segments)
+    logging.debug(f"Generated SRT content: {srt_content}")
+    return srt_content
+def save_srt(content: str, output_path: str) -> str:
+    with open(output_path, "w", encoding="utf-8") as f:
+        f.write(content)
+    return output_path
+def merge_subtitles(video_path: str, srt_path: str, output_path: str) -> str:
+    command = [
+        "ffmpeg",
+        "-y",
+        "-i",
+        video_path,
+        "-vf",
+        f"subtitles={srt_path}",
+        "-c:a",
+        "copy",
+        "-c:v",
+        "libx264",
+        output_path,
+    ]
+    subprocess.run(command, check=True)
+    return output_path

tests/test_subtitle_extractor.py CHANGED Viewed

	@@ -0,0 +1,12 @@

+import os
+import sys
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+from src.subtitle_extractor import format_timestamp
+def test_format_timestamp():
+    assert format_timestamp(0) == "00:00:00,000"
+    assert format_timestamp(1.234) == "00:00:01,234"
+    assert format_timestamp(3661.5) == "01:01:01,500"