Akhil373 commited on
Commit
08b5587
·
0 Parent(s):

first commit

Browse files
Files changed (6) hide show
  1. .gitignore +3 -0
  2. Dockerfile +12 -0
  3. docker-compose.yml +9 -0
  4. main.py +248 -0
  5. requirements.txt +0 -0
  6. test.py +14 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ audio/*
2
+ .venv
3
+ .idea
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.13-slim
2
+
3
+ RUN apt-get update && \
4
+ apt-get install -y --no-install-recommends ffmpeg && \
5
+ rm -rf /var/lib/apt/lists/*
6
+
7
+ WORKDIR /app
8
+
9
+ COPY requirements.txt .
10
+ RUN pip3 install --no-cache-dir -r requirements.txt
11
+
12
+ CMD ["tail", "-f", "/dev/null"]
docker-compose.yml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ app:
3
+ build: .
4
+ container_name: subtitle_generator_dev
5
+ volumes:
6
+ - .:/app
7
+ - /home/axle/.cache/huggingface/hub/models--Systran--faster-whisper-small:/root/.cache/huggingface/hub/models--Systran--faster-whisper-small
8
+ tty: true
9
+ stdin_open: true
main.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from faster_whisper import WhisperModel, BatchedInferencePipeline
2
+ import time
3
+ import os
4
+ import yt_dlp
5
+ import subprocess
6
+ import logging
7
+
8
+ logging.basicConfig()
9
+ logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
10
+
11
+
12
+ def acquire_audio_file(AUDIO_FILE, DOWNLOAD_DIR, output_template):
13
+ if "youtube.com" in AUDIO_FILE:
14
+ URLS = [AUDIO_FILE]
15
+ os.makedirs(DOWNLOAD_DIR, exist_ok=True)
16
+ ydl_opts = {
17
+ 'outtmpl': output_template,
18
+ 'format': 'm4a/bestaudio/best',
19
+ 'postprocessors': [{
20
+ 'key': 'FFmpegExtractAudio',
21
+ 'preferredcodec': 'm4a',
22
+ }]
23
+ }
24
+
25
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
26
+ try:
27
+ print(f"Downloading from YouTube: {URLS[0]}")
28
+ info = ydl.extract_info(URLS[0], download=True)
29
+ if 'requested_downloads' in info and info['requested_downloads']:
30
+ final_filepath = info['requested_downloads'][0]['filepath']
31
+ elif '_filename' in info:
32
+ final_filepath = info['_filename']
33
+ else:
34
+ print("Warning: yt-dlp did not provide a clear filepath. Attempting to construct.")
35
+ if 'title' in info and 'ext' in info:
36
+ guessed_filename = f"{info['title']}.{info['ext']}"
37
+ guessed_path = os.path.join(DOWNLOAD_DIR, guessed_filename)
38
+ if os.path.exists(guessed_path):
39
+ final_filepath = guessed_path
40
+ else:
41
+ print(f"Could not determine downloaded file path for {URLS[0]}.")
42
+ final_filepath = None
43
+
44
+ except Exception as e:
45
+ print(f"An error occurred during YouTube download: {e}")
46
+ final_filepath = None
47
+ finally:
48
+ return final_filepath
49
+ else:
50
+ potential_path = os.path.join(DOWNLOAD_DIR, AUDIO_FILE)
51
+ if os.path.exists(potential_path):
52
+ final_filepath = potential_path
53
+ print(f"Using local file: {final_filepath}")
54
+ elif os.path.exists(AUDIO_FILE):
55
+ final_filepath = AUDIO_FILE
56
+ print(f"Using local file: {final_filepath}")
57
+ else:
58
+ print(f"Local file not found at '{potential_path}' or as '{AUDIO_FILE}'")
59
+ final_filepath = None
60
+ return final_filepath
61
+
62
+
63
+ def create_subtitle_chunks(segments, max_words=8, max_duration=5.0):
64
+ subtitle_chunks = []
65
+
66
+ for segment in segments:
67
+ if hasattr(segment, 'words') and segment.words:
68
+ current_chunk = []
69
+ chunk_start = segment.words[0].start
70
+
71
+ for i, word in enumerate(segment.words):
72
+ current_chunk.append(word.word)
73
+
74
+ if (len(current_chunk) >= max_words or
75
+ word.end - chunk_start >= max_duration):
76
+
77
+ text = ''.join(current_chunk).strip()
78
+ subtitle_chunks.append({
79
+ 'start': chunk_start,
80
+ 'end': word.end,
81
+ 'text': text
82
+ })
83
+
84
+ current_chunk = []
85
+ if i + 1 < len(segment.words):
86
+ chunk_start = segment.words[i + 1].start
87
+
88
+ if current_chunk:
89
+ text = ''.join(current_chunk).strip()
90
+ subtitle_chunks.append({
91
+ 'start': chunk_start,
92
+ 'end': segment.words[-1].end,
93
+ 'text': text
94
+ })
95
+ else:
96
+ subtitle_chunks.append({
97
+ 'start': segment.start,
98
+ 'end': segment.end,
99
+ 'text': segment.text
100
+ })
101
+
102
+ return subtitle_chunks
103
+
104
+
105
+ def format_time(seconds):
106
+ seconds -= 0.2
107
+ hours = int(seconds // 3600)
108
+ minutes = int((seconds % 3600) // 60)
109
+ seconds_remainder = seconds % 60
110
+ milliseconds = int((seconds_remainder - int(seconds_remainder)) * 1000)
111
+
112
+ return f"{hours:02d}:{minutes:02d}:{int(seconds_remainder):02d},{milliseconds:03d}"
113
+
114
+
115
+ def add_subtitles(media_path):
116
+ base, ext = os.path.splitext(os.path.basename(media_path))
117
+ dir_path = os.path.dirname(media_path)
118
+
119
+ final_output = os.path.join(dir_path, f"{base}_subtitled.mp4")
120
+ subtitle_file = os.path.join(dir_path, f"{base}.srt")
121
+
122
+ if not os.path.exists(subtitle_file):
123
+ print(f"Error: Subtitle file not found at {subtitle_file}")
124
+ return
125
+
126
+ video_formats = ['.mp4', '.webm', '.mpeg']
127
+
128
+ try:
129
+ if ext.lower() in video_formats:
130
+ print('Found video file.')
131
+
132
+ temp_output = os.path.join(dir_path, f"{base}_temp.mp4")
133
+ cmd = ['ffmpeg', '-i', media_path, '-i', subtitle_file, '-c', 'copy', '-c:s', 'mov_text', temp_output, '-y']
134
+
135
+ subprocess.run(cmd, check=True, capture_output=True)
136
+
137
+ if ext.lower() == ".mp4":
138
+ os.remove(media_path)
139
+ os.rename(temp_output, media_path)
140
+ else:
141
+ os.rename(temp_output, final_output)
142
+ else:
143
+ print('Found audio file.')
144
+ temp_video = os.path.join(dir_path, f"{base}_temp.mp4")
145
+ cmd1 = ['ffmpeg', '-f', 'lavfi', '-i', 'color=c=black:s=1280x720:r=5',
146
+ '-i', media_path, '-c:a', 'copy', '-shortest', temp_video, '-y']
147
+ subprocess.run(cmd1, check=True, capture_output=True)
148
+
149
+ cmd2 = ['ffmpeg', '-i', temp_video, '-i', subtitle_file, '-c',
150
+ 'copy', '-c:s', 'mov_text', final_output, '-y']
151
+ subprocess.run(cmd2, check=True, capture_output=True)
152
+ os.remove(temp_video)
153
+
154
+ print(f"Successfully created: {final_output}")
155
+
156
+ except subprocess.CalledProcessError as e:
157
+ print(f"FFmpeg Error: {e.stderr.decode()}")
158
+ except Exception as e:
159
+ print(f"An error occurred: {e}")
160
+
161
+
162
+
163
+ def main():
164
+ all_files = os.listdir('audio')
165
+
166
+ for i, file in enumerate(all_files):
167
+ print(f"[{i+1}] - {file}")
168
+
169
+ file_idx = int(input('Enter file index: '))
170
+ input_file_path = all_files[file_idx-1];
171
+ DOWNLOAD_DIR = "audio"
172
+ AUDIO_FILE=os.path.join(DOWNLOAD_DIR, input_file_path)
173
+
174
+ output_template = os.path.join(DOWNLOAD_DIR, '%(title)s.%(ext)s')
175
+ final_filepath = acquire_audio_file(AUDIO_FILE, DOWNLOAD_DIR, output_template)
176
+
177
+ if final_filepath and os.path.exists(final_filepath):
178
+ print(f"Processing audio file: {final_filepath}")
179
+ print(f"File size: {os.path.getsize(final_filepath) / 1024 / 1024:.2f} MB")
180
+
181
+ base_name = os.path.basename(final_filepath)
182
+ file_name_without_extension, _ = os.path.splitext(base_name)
183
+
184
+ FILE_NAME_FOR_TXT = file_name_without_extension
185
+ MODEL_NAME = "small"
186
+
187
+ print(f"\nLoading Whisper model: {MODEL_NAME}...")
188
+ try:
189
+ model = WhisperModel(MODEL_NAME, device="cpu", compute_type="int8")
190
+ batched_model = BatchedInferencePipeline(model=model)
191
+ print("Model loaded successfully.")
192
+
193
+ print("\nStarting transcription...")
194
+ start_time = time.time()
195
+
196
+ segments, info = batched_model.transcribe(
197
+ final_filepath,
198
+ batch_size=8,
199
+ beam_size=5,
200
+ word_timestamps=True
201
+ )
202
+
203
+ os.makedirs(DOWNLOAD_DIR, exist_ok=True)
204
+ transcript_filename = os.path.join(DOWNLOAD_DIR, f"{FILE_NAME_FOR_TXT}.srt")
205
+
206
+ subtitle_chunks = create_subtitle_chunks(segments, max_words=12, max_duration=4.0)
207
+
208
+ full_transcript_text = []
209
+ for chunk in subtitle_chunks:
210
+ start_time_formatted = format_time(chunk['start'])
211
+ end_time_formatted = format_time(chunk['end'])
212
+
213
+ line = f"{start_time_formatted} --> {end_time_formatted}\n{chunk['text']}"
214
+ full_transcript_text.append(line)
215
+
216
+
217
+ with open(transcript_filename, "w", encoding="utf-8") as f:
218
+ count = 1
219
+ for line in full_transcript_text:
220
+ f.write(f"{count}\n{line}\n\n")
221
+ count += 1
222
+
223
+
224
+ end_time = time.time()
225
+ processed_time = end_time - start_time
226
+
227
+ print(f"\nTranscription complete and saved to {transcript_filename}.")
228
+ print(f"Processed in {processed_time:.2f} seconds")
229
+
230
+ add_subtitles(final_filepath)
231
+
232
+ except Exception as e:
233
+ print(f"An error occurred during transcription: {e}")
234
+
235
+ finally:
236
+ if 'model' in locals():
237
+ del model
238
+ if 'batched_model' in locals():
239
+ del batched_model
240
+ print("Model resources released.")
241
+ import gc
242
+ gc.collect()
243
+
244
+ else:
245
+ print("Audio file acquisition failed (YouTube download or local file not found). Cannot proceed with transcription.")
246
+
247
+ if __name__ == "__main__":
248
+ main()
requirements.txt ADDED
Binary file (1.05 kB). View file
 
test.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ all_files = os.listdir('audio')
4
+
5
+ for i, file in enumerate(all_files):
6
+ print(f"[{i}] - {file}")
7
+
8
+ file_idx = int(input('Enter file index: '))
9
+ input_file_path = all_files[file_idx];
10
+ DOWNLOAD_DIR = "audio"
11
+ AUDIO_FILE=os.path.join(DOWNLOAD_DIR, input_file_path)
12
+
13
+ print(AUDIO_FILE)
14
+ print(DOWNLOAD_DIR)