""" Subtitle Extractor Module Extracts subtitles from videos using OCR and generates SRT files """ import cv2 import sys import os from pathlib import Path from collections import defaultdict # Add backend to path sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from backend.main import SubtitleDetect class SubtitleExtractor: """Extract subtitles from video and generate SRT files""" def __init__(self, video_path, sub_area=None): """ Initialize subtitle extractor Args: video_path: Path to video file sub_area: Optional subtitle area (ymin, ymax, xmin, xmax) """ self.video_path = video_path self.sub_area = sub_area self.detector = SubtitleDetect(video_path, sub_area) # Get video properties self.video_cap = cv2.VideoCapture(video_path) self.fps = self.video_cap.get(cv2.CAP_PROP_FPS) self.frame_count = int(self.video_cap.get(cv2.CAP_PROP_FRAME_COUNT)) @property def text_recognizer(self): """Lazy load PaddleOCR text recognizer""" if not hasattr(self, '_text_recognizer'): import paddle paddle.disable_signal_handler() from paddleocr.tools.infer import utility from paddleocr.tools.infer.predict_rec import TextRecognizer import importlib import config importlib.reload(config) args = utility.parse_args() args.rec_algorithm = 'CRNN' args.rec_model_dir = config.REC_MODEL_PATH if hasattr(config, 'REC_MODEL_PATH') else os.path.join(config.DET_MODEL_BASE, config.MODEL_VERSION, 'ch_rec') args.use_onnx = len(config.ONNX_PROVIDERS) > 0 args.onnx_providers = config.ONNX_PROVIDERS self._text_recognizer = TextRecognizer(args) return self._text_recognizer def extract_text_from_frame(self, frame, boxes): """ Extract text from frame using OCR Args: frame: Video frame (numpy array) boxes: List of detected text boxes [(xmin, xmax, ymin, ymax), ...] Returns: List of extracted text strings """ texts = [] for box in boxes: xmin, xmax, ymin, ymax = box # Crop text region text_region = frame[ymin:ymax, xmin:xmax] if text_region.size == 0: continue try: # Run OCR on cropped region rec_result, _ = self.text_recognizer([text_region]) if rec_result and len(rec_result) > 0: text, confidence = rec_result[0] if confidence > 0.5: # Only accept if confidence > 50% texts.append(text) except Exception as e: print(f"Warning: OCR failed for box {box}: {e}") continue return texts def format_timestamp(self, seconds): """ Convert seconds to SRT timestamp format (HH:MM:SS,mmm) Args: seconds: Time in seconds (float) Returns: Formatted timestamp string """ hours = int(seconds // 3600) minutes = int((seconds % 3600) // 60) secs = int(seconds % 60) millis = int((seconds % 1) * 1000) return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}" def extract_subtitles(self, progress_callback=None): """ Extract subtitles with OCR and timestamps Args: progress_callback: Optional callback function for progress updates Returns: List of subtitle dictionaries with 'start', 'end', 'text' keys """ print("[Subtitle Extractor] Starting subtitle extraction...") # Detect subtitle regions subtitle_frame_dict = self.detector.find_subtitle_frame_no() if not subtitle_frame_dict: print("[Subtitle Extractor] No subtitles detected!") return [] print(f"[Subtitle Extractor] Found subtitles in {len(subtitle_frame_dict)} frames") # Group continuous frames with same text subtitles = [] current_subtitle = None # Reset video capture self.video_cap.set(cv2.CAP_PROP_POS_FRAMES, 0) current_frame_no = 0 # Find continuous ranges continuous_ranges = self.detector.find_continuous_ranges_with_same_mask(subtitle_frame_dict) for start_frame, end_frame in continuous_ranges: # Seek to start frame self.video_cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame - 1) ret, frame = self.video_cap.read() if not ret: continue # Get boxes for this frame boxes = subtitle_frame_dict.get(start_frame, []) # Extract text texts = self.extract_text_from_frame(frame, boxes) combined_text = " ".join(texts).strip() if not combined_text: continue # Calculate timestamps start_time = (start_frame - 1) / self.fps end_time = end_frame / self.fps # Merge with previous if same text and continuous if (current_subtitle and current_subtitle['text'] == combined_text and abs(start_time - current_subtitle['end']) < 1.0): # Extend end time current_subtitle['end'] = end_time else: # Add previous subtitle if exists if current_subtitle: subtitles.append(current_subtitle) # Start new subtitle current_subtitle = { 'start': start_time, 'end': end_time, 'text': combined_text } if progress_callback: progress = end_frame / self.frame_count progress_callback(progress, f"Extracting subtitles: {len(subtitles)+1} found") # Add last subtitle if current_subtitle: subtitles.append(current_subtitle) print(f"[Subtitle Extractor] Extracted {len(subtitles)} subtitle segments") return subtitles def generate_srt(self, subtitles, output_path): """ Generate SRT file from subtitles Args: subtitles: List of subtitle dictionaries output_path: Path to save SRT file Returns: Path to generated SRT file """ print(f"[Subtitle Extractor] Generating SRT file: {output_path}") with open(output_path, 'w', encoding='utf-8') as f: for i, sub in enumerate(subtitles, 1): # Subtitle number f.write(f"{i}\n") # Timestamps start_ts = self.format_timestamp(sub['start']) end_ts = self.format_timestamp(sub['end']) f.write(f"{start_ts} --> {end_ts}\n") # Text f.write(f"{sub['text']}\n") # Blank line f.write("\n") print(f"[Subtitle Extractor] SRT file saved: {output_path}") return output_path def extract_to_srt(self, output_path=None, progress_callback=None): """ Complete extraction pipeline: detect -> OCR -> generate SRT Args: output_path: Optional custom output path for SRT file progress_callback: Optional callback for progress updates Returns: Path to generated SRT file """ # Default output path if output_path is None: video_name = Path(self.video_path).stem output_dir = Path(self.video_path).parent output_path = output_dir / f"{video_name}_subtitles.srt" # Extract subtitles subtitles = self.extract_subtitles(progress_callback) if not subtitles: # Create empty SRT with open(output_path, 'w', encoding='utf-8') as f: f.write("# No subtitles detected\n") return str(output_path) # Generate SRT return self.generate_srt(subtitles, str(output_path)) if __name__ == '__main__': import sys if len(sys.argv) < 2: print("Usage: python subtitle_extractor.py ") sys.exit(1) video_path = sys.argv[1] extractor = SubtitleExtractor(video_path) srt_path = extractor.extract_to_srt() print(f"Subtitles extracted to: {srt_path}")