Rasta02 commited on
Commit
e53e238
·
verified ·
1 Parent(s): 1c865d3

Upload backend/subtitle_extractor.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. backend/subtitle_extractor.py +265 -0
backend/subtitle_extractor.py ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Subtitle Extractor Module
3
+ Extracts subtitles from videos using OCR and generates SRT files
4
+ """
5
+
6
+ import cv2
7
+ import sys
8
+ import os
9
+ from pathlib import Path
10
+ from collections import defaultdict
11
+
12
+ # Add backend to path
13
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
14
+
15
+ from backend.main import SubtitleDetect
16
+
17
+
18
+ class SubtitleExtractor:
19
+ """Extract subtitles from video and generate SRT files"""
20
+
21
+ def __init__(self, video_path, sub_area=None):
22
+ """
23
+ Initialize subtitle extractor
24
+
25
+ Args:
26
+ video_path: Path to video file
27
+ sub_area: Optional subtitle area (ymin, ymax, xmin, xmax)
28
+ """
29
+ self.video_path = video_path
30
+ self.sub_area = sub_area
31
+ self.detector = SubtitleDetect(video_path, sub_area)
32
+
33
+ # Get video properties
34
+ self.video_cap = cv2.VideoCapture(video_path)
35
+ self.fps = self.video_cap.get(cv2.CAP_PROP_FPS)
36
+ self.frame_count = int(self.video_cap.get(cv2.CAP_PROP_FRAME_COUNT))
37
+
38
+ @property
39
+ def text_recognizer(self):
40
+ """Lazy load PaddleOCR text recognizer"""
41
+ if not hasattr(self, '_text_recognizer'):
42
+ import paddle
43
+ paddle.disable_signal_handler()
44
+ from paddleocr.tools.infer import utility
45
+ from paddleocr.tools.infer.predict_rec import TextRecognizer
46
+ import importlib
47
+ import config
48
+ importlib.reload(config)
49
+
50
+ args = utility.parse_args()
51
+ args.rec_algorithm = 'CRNN'
52
+ args.rec_model_dir = config.REC_MODEL_PATH if hasattr(config, 'REC_MODEL_PATH') else os.path.join(config.DET_MODEL_BASE, config.MODEL_VERSION, 'ch_rec')
53
+ args.use_onnx = len(config.ONNX_PROVIDERS) > 0
54
+ args.onnx_providers = config.ONNX_PROVIDERS
55
+
56
+ self._text_recognizer = TextRecognizer(args)
57
+ return self._text_recognizer
58
+
59
+ def extract_text_from_frame(self, frame, boxes):
60
+ """
61
+ Extract text from frame using OCR
62
+
63
+ Args:
64
+ frame: Video frame (numpy array)
65
+ boxes: List of detected text boxes [(xmin, xmax, ymin, ymax), ...]
66
+
67
+ Returns:
68
+ List of extracted text strings
69
+ """
70
+ texts = []
71
+
72
+ for box in boxes:
73
+ xmin, xmax, ymin, ymax = box
74
+
75
+ # Crop text region
76
+ text_region = frame[ymin:ymax, xmin:xmax]
77
+
78
+ if text_region.size == 0:
79
+ continue
80
+
81
+ try:
82
+ # Run OCR on cropped region
83
+ rec_result, _ = self.text_recognizer([text_region])
84
+ if rec_result and len(rec_result) > 0:
85
+ text, confidence = rec_result[0]
86
+ if confidence > 0.5: # Only accept if confidence > 50%
87
+ texts.append(text)
88
+ except Exception as e:
89
+ print(f"Warning: OCR failed for box {box}: {e}")
90
+ continue
91
+
92
+ return texts
93
+
94
+ def format_timestamp(self, seconds):
95
+ """
96
+ Convert seconds to SRT timestamp format (HH:MM:SS,mmm)
97
+
98
+ Args:
99
+ seconds: Time in seconds (float)
100
+
101
+ Returns:
102
+ Formatted timestamp string
103
+ """
104
+ hours = int(seconds // 3600)
105
+ minutes = int((seconds % 3600) // 60)
106
+ secs = int(seconds % 60)
107
+ millis = int((seconds % 1) * 1000)
108
+
109
+ return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}"
110
+
111
+ def extract_subtitles(self, progress_callback=None):
112
+ """
113
+ Extract subtitles with OCR and timestamps
114
+
115
+ Args:
116
+ progress_callback: Optional callback function for progress updates
117
+
118
+ Returns:
119
+ List of subtitle dictionaries with 'start', 'end', 'text' keys
120
+ """
121
+ print("[Subtitle Extractor] Starting subtitle extraction...")
122
+
123
+ # Detect subtitle regions
124
+ subtitle_frame_dict = self.detector.find_subtitle_frame_no()
125
+
126
+ if not subtitle_frame_dict:
127
+ print("[Subtitle Extractor] No subtitles detected!")
128
+ return []
129
+
130
+ print(f"[Subtitle Extractor] Found subtitles in {len(subtitle_frame_dict)} frames")
131
+
132
+ # Group continuous frames with same text
133
+ subtitles = []
134
+ current_subtitle = None
135
+
136
+ # Reset video capture
137
+ self.video_cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
138
+ current_frame_no = 0
139
+
140
+ # Find continuous ranges
141
+ continuous_ranges = self.detector.find_continuous_ranges_with_same_mask(subtitle_frame_dict)
142
+
143
+ for start_frame, end_frame in continuous_ranges:
144
+ # Seek to start frame
145
+ self.video_cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame - 1)
146
+ ret, frame = self.video_cap.read()
147
+
148
+ if not ret:
149
+ continue
150
+
151
+ # Get boxes for this frame
152
+ boxes = subtitle_frame_dict.get(start_frame, [])
153
+
154
+ # Extract text
155
+ texts = self.extract_text_from_frame(frame, boxes)
156
+ combined_text = " ".join(texts).strip()
157
+
158
+ if not combined_text:
159
+ continue
160
+
161
+ # Calculate timestamps
162
+ start_time = (start_frame - 1) / self.fps
163
+ end_time = end_frame / self.fps
164
+
165
+ # Merge with previous if same text and continuous
166
+ if (current_subtitle and
167
+ current_subtitle['text'] == combined_text and
168
+ abs(start_time - current_subtitle['end']) < 1.0):
169
+ # Extend end time
170
+ current_subtitle['end'] = end_time
171
+ else:
172
+ # Add previous subtitle if exists
173
+ if current_subtitle:
174
+ subtitles.append(current_subtitle)
175
+
176
+ # Start new subtitle
177
+ current_subtitle = {
178
+ 'start': start_time,
179
+ 'end': end_time,
180
+ 'text': combined_text
181
+ }
182
+
183
+ if progress_callback:
184
+ progress = end_frame / self.frame_count
185
+ progress_callback(progress, f"Extracting subtitles: {len(subtitles)+1} found")
186
+
187
+ # Add last subtitle
188
+ if current_subtitle:
189
+ subtitles.append(current_subtitle)
190
+
191
+ print(f"[Subtitle Extractor] Extracted {len(subtitles)} subtitle segments")
192
+ return subtitles
193
+
194
+ def generate_srt(self, subtitles, output_path):
195
+ """
196
+ Generate SRT file from subtitles
197
+
198
+ Args:
199
+ subtitles: List of subtitle dictionaries
200
+ output_path: Path to save SRT file
201
+
202
+ Returns:
203
+ Path to generated SRT file
204
+ """
205
+ print(f"[Subtitle Extractor] Generating SRT file: {output_path}")
206
+
207
+ with open(output_path, 'w', encoding='utf-8') as f:
208
+ for i, sub in enumerate(subtitles, 1):
209
+ # Subtitle number
210
+ f.write(f"{i}\n")
211
+
212
+ # Timestamps
213
+ start_ts = self.format_timestamp(sub['start'])
214
+ end_ts = self.format_timestamp(sub['end'])
215
+ f.write(f"{start_ts} --> {end_ts}\n")
216
+
217
+ # Text
218
+ f.write(f"{sub['text']}\n")
219
+
220
+ # Blank line
221
+ f.write("\n")
222
+
223
+ print(f"[Subtitle Extractor] SRT file saved: {output_path}")
224
+ return output_path
225
+
226
+ def extract_to_srt(self, output_path=None, progress_callback=None):
227
+ """
228
+ Complete extraction pipeline: detect -> OCR -> generate SRT
229
+
230
+ Args:
231
+ output_path: Optional custom output path for SRT file
232
+ progress_callback: Optional callback for progress updates
233
+
234
+ Returns:
235
+ Path to generated SRT file
236
+ """
237
+ # Default output path
238
+ if output_path is None:
239
+ video_name = Path(self.video_path).stem
240
+ output_dir = Path(self.video_path).parent
241
+ output_path = output_dir / f"{video_name}_subtitles.srt"
242
+
243
+ # Extract subtitles
244
+ subtitles = self.extract_subtitles(progress_callback)
245
+
246
+ if not subtitles:
247
+ # Create empty SRT
248
+ with open(output_path, 'w', encoding='utf-8') as f:
249
+ f.write("# No subtitles detected\n")
250
+ return str(output_path)
251
+
252
+ # Generate SRT
253
+ return self.generate_srt(subtitles, str(output_path))
254
+
255
+
256
+ if __name__ == '__main__':
257
+ import sys
258
+ if len(sys.argv) < 2:
259
+ print("Usage: python subtitle_extractor.py <video_path>")
260
+ sys.exit(1)
261
+
262
+ video_path = sys.argv[1]
263
+ extractor = SubtitleExtractor(video_path)
264
+ srt_path = extractor.extract_to_srt()
265
+ print(f"Subtitles extracted to: {srt_path}")