Spaces:

AxleToe
/

whisper-subtitle-generator

Sleeping

App Files Files Community

Akhil373 commited on Aug 15, 2025

Commit

08b5587

0 Parent(s):

first commit

Browse files

Files changed (6) hide show

.gitignore +3 -0
Dockerfile +12 -0
docker-compose.yml +9 -0
main.py +248 -0
requirements.txt +0 -0
test.py +14 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+audio/*
+.venv
+.idea

Dockerfile ADDED Viewed

	@@ -0,0 +1,12 @@

+FROM python:3.13-slim
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends ffmpeg && \
+    rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+COPY requirements.txt .
+RUN pip3 install --no-cache-dir -r requirements.txt
+CMD ["tail", "-f", "/dev/null"]

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,9 @@

+services:
+  app:
+    build: .
+    container_name: subtitle_generator_dev
+    volumes:
+      - .:/app
+      - /home/axle/.cache/huggingface/hub/models--Systran--faster-whisper-small:/root/.cache/huggingface/hub/models--Systran--faster-whisper-small
+    tty: true
+    stdin_open: true

main.py ADDED Viewed

	@@ -0,0 +1,248 @@

+from faster_whisper import WhisperModel, BatchedInferencePipeline
+import time
+import os
+import yt_dlp
+import subprocess
+import logging
+logging.basicConfig()
+logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
+def acquire_audio_file(AUDIO_FILE, DOWNLOAD_DIR, output_template):
+    if "youtube.com" in AUDIO_FILE:
+        URLS = [AUDIO_FILE]
+        os.makedirs(DOWNLOAD_DIR, exist_ok=True)
+        ydl_opts = {
+            'outtmpl': output_template,
+            'format': 'm4a/bestaudio/best',
+            'postprocessors': [{
+                'key': 'FFmpegExtractAudio',
+                'preferredcodec': 'm4a',
+            }]
+        }
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            try:
+                print(f"Downloading from YouTube: {URLS[0]}")
+                info = ydl.extract_info(URLS[0], download=True)
+                if 'requested_downloads' in info and info['requested_downloads']:
+                    final_filepath = info['requested_downloads'][0]['filepath']
+                elif '_filename' in info:
+                    final_filepath = info['_filename']
+                else:
+                    print("Warning: yt-dlp did not provide a clear filepath. Attempting to construct.")
+                    if 'title' in info and 'ext' in info:
+                        guessed_filename = f"{info['title']}.{info['ext']}"
+                        guessed_path = os.path.join(DOWNLOAD_DIR, guessed_filename)
+                        if os.path.exists(guessed_path):
+                            final_filepath = guessed_path
+                        else:
+                            print(f"Could not determine downloaded file path for {URLS[0]}.")
+                            final_filepath = None
+            except Exception as e:
+                print(f"An error occurred during YouTube download: {e}")
+                final_filepath = None
+            finally:
+                return final_filepath
+    else:
+        potential_path = os.path.join(DOWNLOAD_DIR, AUDIO_FILE)
+        if os.path.exists(potential_path):
+            final_filepath = potential_path
+            print(f"Using local file: {final_filepath}")
+        elif os.path.exists(AUDIO_FILE):
+            final_filepath = AUDIO_FILE
+            print(f"Using local file: {final_filepath}")
+        else:
+            print(f"Local file not found at '{potential_path}' or as '{AUDIO_FILE}'")
+            final_filepath = None
+        return final_filepath
+def create_subtitle_chunks(segments, max_words=8, max_duration=5.0):
+            subtitle_chunks = []
+            for segment in segments:
+                if hasattr(segment, 'words') and segment.words:
+                    current_chunk = []
+                    chunk_start = segment.words[0].start
+                    for i, word in enumerate(segment.words):
+                        current_chunk.append(word.word)
+                        if (len(current_chunk) >= max_words or
+                            word.end - chunk_start >= max_duration):
+                            text = ''.join(current_chunk).strip()
+                            subtitle_chunks.append({
+                                'start': chunk_start,
+                                'end': word.end,
+                                'text': text
+                            })
+                            current_chunk = []
+                            if i + 1 < len(segment.words):
+                                chunk_start = segment.words[i + 1].start
+                    if current_chunk:
+                        text = ''.join(current_chunk).strip()
+                        subtitle_chunks.append({
+                            'start': chunk_start,
+                            'end': segment.words[-1].end,
+                            'text': text
+                        })
+                else:
+                    subtitle_chunks.append({
+                        'start': segment.start,
+                        'end': segment.end,
+                        'text': segment.text
+                    })
+            return subtitle_chunks
+def format_time(seconds):
+    seconds -= 0.2
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    seconds_remainder = seconds % 60
+    milliseconds = int((seconds_remainder - int(seconds_remainder)) * 1000)
+    return f"{hours:02d}:{minutes:02d}:{int(seconds_remainder):02d},{milliseconds:03d}"
+def add_subtitles(media_path):
+    base, ext = os.path.splitext(os.path.basename(media_path))
+    dir_path = os.path.dirname(media_path)
+    final_output = os.path.join(dir_path, f"{base}_subtitled.mp4")
+    subtitle_file = os.path.join(dir_path, f"{base}.srt")
+    if not os.path.exists(subtitle_file):
+        print(f"Error: Subtitle file not found at {subtitle_file}")
+        return
+    video_formats = ['.mp4', '.webm', '.mpeg']
+    try:
+        if ext.lower() in video_formats:
+            print('Found video file.')
+            temp_output = os.path.join(dir_path, f"{base}_temp.mp4")
+            cmd = ['ffmpeg', '-i', media_path, '-i', subtitle_file, '-c', 'copy', '-c:s', 'mov_text', temp_output, '-y']
+            subprocess.run(cmd, check=True, capture_output=True)
+            if ext.lower() == ".mp4":
+                os.remove(media_path)
+                os.rename(temp_output, media_path)
+            else:
+                os.rename(temp_output, final_output)
+        else:
+            print('Found audio file.')
+            temp_video = os.path.join(dir_path, f"{base}_temp.mp4")
+            cmd1 = ['ffmpeg', '-f', 'lavfi', '-i', 'color=c=black:s=1280x720:r=5',
+                    '-i', media_path, '-c:a', 'copy', '-shortest', temp_video, '-y']
+            subprocess.run(cmd1, check=True, capture_output=True)
+            cmd2 = ['ffmpeg', '-i', temp_video, '-i', subtitle_file, '-c',
+                    'copy', '-c:s', 'mov_text', final_output, '-y']
+            subprocess.run(cmd2, check=True, capture_output=True)
+            os.remove(temp_video)
+        print(f"Successfully created: {final_output}")
+    except subprocess.CalledProcessError as e:
+        print(f"FFmpeg Error: {e.stderr.decode()}")
+    except Exception as e:
+        print(f"An error occurred: {e}")
+def main():
+    all_files = os.listdir('audio')
+    for i, file in enumerate(all_files):
+       print(f"[{i+1}] - {file}")
+    file_idx = int(input('Enter file index: '))
+    input_file_path = all_files[file_idx-1];
+    DOWNLOAD_DIR = "audio"
+    AUDIO_FILE=os.path.join(DOWNLOAD_DIR, input_file_path)
+    output_template = os.path.join(DOWNLOAD_DIR, '%(title)s.%(ext)s')
+    final_filepath = acquire_audio_file(AUDIO_FILE, DOWNLOAD_DIR, output_template)
+    if final_filepath and os.path.exists(final_filepath):
+        print(f"Processing audio file: {final_filepath}")
+        print(f"File size: {os.path.getsize(final_filepath) / 1024 / 1024:.2f} MB")
+        base_name = os.path.basename(final_filepath)
+        file_name_without_extension, _ = os.path.splitext(base_name)
+        FILE_NAME_FOR_TXT = file_name_without_extension
+        MODEL_NAME = "small"
+        print(f"\nLoading Whisper model: {MODEL_NAME}...")
+        try:
+            model = WhisperModel(MODEL_NAME, device="cpu", compute_type="int8")
+            batched_model = BatchedInferencePipeline(model=model)
+            print("Model loaded successfully.")
+            print("\nStarting transcription...")
+            start_time = time.time()
+            segments, info = batched_model.transcribe(
+                final_filepath,
+                batch_size=8,
+                beam_size=5,
+                word_timestamps=True
+            )
+            os.makedirs(DOWNLOAD_DIR, exist_ok=True)
+            transcript_filename = os.path.join(DOWNLOAD_DIR, f"{FILE_NAME_FOR_TXT}.srt")
+            subtitle_chunks = create_subtitle_chunks(segments, max_words=12, max_duration=4.0)
+            full_transcript_text = []
+            for chunk in subtitle_chunks:
+                start_time_formatted = format_time(chunk['start'])
+                end_time_formatted = format_time(chunk['end'])
+                line = f"{start_time_formatted} --> {end_time_formatted}\n{chunk['text']}"
+                full_transcript_text.append(line)
+            with open(transcript_filename, "w", encoding="utf-8") as f:
+                count = 1
+                for line in full_transcript_text:
+                    f.write(f"{count}\n{line}\n\n")
+                    count += 1
+            end_time = time.time()
+            processed_time = end_time - start_time
+            print(f"\nTranscription complete and saved to {transcript_filename}.")
+            print(f"Processed in {processed_time:.2f} seconds")
+            add_subtitles(final_filepath)
+        except Exception as e:
+            print(f"An error occurred during transcription: {e}")
+        finally:
+            if 'model' in locals():
+                del model
+            if 'batched_model' in locals():
+                del batched_model
+            print("Model resources released.")
+            import gc
+            gc.collect()
+    else:
+        print("Audio file acquisition failed (YouTube download or local file not found). Cannot proceed with transcription.")
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

Binary file (1.05 kB). View file

test.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import os
+all_files = os.listdir('audio')
+for i, file in enumerate(all_files):
+    print(f"[{i}] - {file}")
+file_idx = int(input('Enter file index: '))
+input_file_path = all_files[file_idx];
+DOWNLOAD_DIR = "audio"
+AUDIO_FILE=os.path.join(DOWNLOAD_DIR, input_file_path)
+print(AUDIO_FILE)
+print(DOWNLOAD_DIR)