OCR-Sub

Sleeping

App Files Files Community

OCR-Sub / app.py

hoanglinhn0

Update app.py

01dfd90 verified 2 months ago

raw

history blame contribute delete

9.57 kB

	# -- encoding: utf-8 --
	import time
	import tempfile
	import os
	from difflib import SequenceMatcher

	import cv2
	import numpy as np
	import streamlit as st
	from rapidocr_onnxruntime import RapidOCR

	# 1. CẤU HÌNH TRANG MOBILE
	st.set_page_config(page_title="OCR Android: Chậm & Chắc", layout="centered")

	# --- CACHE MODEL ---
	@st.cache_resource
	def load_ocr_model():
	return RapidOCR()

	# --- HÀM HỖ TRỢ ---
	def similar(a, b):
	return SequenceMatcher(None, a, b).ratio()

	def format_timestamp(seconds):
	millis = int((seconds - int(seconds)) * 1000)
	mins, secs = divmod(int(seconds), 60)
	hours, mins = divmod(mins, 60)
	return f"{hours:02}:{mins:02}:{secs:02},{millis:03}"

	def get_video_info(video_path):
	cap = cv2.VideoCapture(video_path)
	if not cap.isOpened(): return None
	width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
	height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
	fps = cap.get(cv2.CAP_PROP_FPS)
	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
	cap.release()
	return width, height, fps, total_frames

	# --- ENGINE XỬ LÝ (ĐÃ TINH CHỈNH ĐỂ BẮT DÍNH MỌI CHỮ) ---
	def extract_subtitles(video_path, ocr_engine, crop_ratio, frame_skip, conf_thresh, use_smart_filter, progress_bar, status_text):
	cap = cv2.VideoCapture(video_path)
	fps = cap.get(cv2.CAP_PROP_FPS)
	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
	height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
	y_start = int(height * (1 - crop_ratio))

	subs = []
	current_sub = None

	prev_roi_gray = None
	last_text = ""
	frame_idx = 0
	pbar_cnt = 0

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret: break

	# Nhảy frame (Skip)
	if frame_idx % frame_skip != 0:
	frame_idx += 1
	continue

	pbar_cnt += 1
	if pbar_cnt % 20 == 0:
	prog = min(frame_idx / total_frames, 1.0)
	progress_bar.progress(prog)
	# Hiển thị giây hiện tại để biết máy đang chạy đến đâu
	current_sec = int(frame_idx/fps)
	status_text.text(f"🔍 Đang soi kỹ... {int(prog*100)}% (Giây thứ: {current_sec})")

	# 1. Cắt vùng sub
	roi = frame[y_start:height, :]

	# 2. Xử lý ảnh (Smart Filter)
	# Nếu bật chế độ này, máy sẽ so sánh với frame trước để bỏ qua nếu giống nhau
	# Nếu tắt (False), máy sẽ OCR tất cả các frame -> Chậm nhưng KHÔNG SÓT CHỮ
	should_run_ocr = True

	if use_smart_filter:
	roi_gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
	if prev_roi_gray is not None:
	try:
	score = cv2.absdiff(roi_gray, prev_roi_gray)
	non_zero = np.count_nonzero(score > 30)
	if non_zero / roi_gray.size < 0.03: # Nếu thay đổi < 3%
	should_run_ocr = False
	text = last_text
	except: pass
	prev_roi_gray = roi_gray

	# 3. Chạy OCR
	if should_run_ocr:
	res, _ = ocr_engine(roi)
	# Lọc tin cậy: Chỉ lấy chữ rõ
	text = " ".join([line[1] for line in res if float(line[2]) >= conf_thresh]).strip() if res else ""
	last_text = text

	timestamp = frame_idx / fps

	# 4. Logic gộp sub (Đã nới lỏng để bắt nhạy hơn)
	if text:
	if current_sub is None:
	current_sub = {'start': timestamp, 'end': timestamp, 'text': text}
	else:
	# Nếu giống > 70% thì gộp (Giảm từ 75 xuống 70 để đỡ bị cắt vụn)
	if similar(text, current_sub['text']) > 0.70:
	current_sub['end'] = timestamp
	# Luôn ưu tiên lấy câu dài hơn
	if len(text) > len(current_sub['text']):
	current_sub['text'] = text
	else:
	# Lưu câu cũ
	if current_sub['end'] - current_sub['start'] > 0.1:
	subs.append(current_sub)
	current_sub = {'start': timestamp, 'end': timestamp, 'text': text}
	else:
	# Khoảng trống
	if current_sub:
	if current_sub['end'] - current_sub['start'] > 0.1:
	subs.append(current_sub)
	current_sub = None

	frame_idx += 1

	if current_sub and (current_sub['end'] - current_sub['start'] > 0.1):
	subs.append(current_sub)

	cap.release()

	# Format kết quả
	final_subs = []
	for i, s in enumerate(subs):
	final_subs.append({
	"index": i + 1,
	"start": format_timestamp(s['start']),
	"end": format_timestamp(s['end']),
	"text": s['text']
	})
	return final_subs

	def generate_srt_content(subs):
	srt_content = ""
	for sub in subs:
	srt_content += f"{sub['index']}\n{sub['start']} --> {sub['end']}\n{sub['text']}\n\n"
	return srt_content

	# --- GIAO DIỆN ANDROID ---

	st.markdown("### 📱 Video OCR (Chậm mà Chắc)")

	uploaded_file = st.file_uploader("Chọn Video:", type=["mp4", "avi", "mkv"])

	if uploaded_file is not None:
	# Lưu file tạm
	tfile = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
	chunk_size = 10 * 1024 * 1024
	with st.status("Đang chuẩn bị...", expanded=True) as status:
	while True:
	chunk = uploaded_file.read(chunk_size)
	if not chunk: break
	tfile.write(chunk)
	tfile.close()
	status.update(label="Đã xong!", state="complete", expanded=False)

	video_path = tfile.name

	try:
	width, height, fps, total_frames = get_video_info(video_path)
	except: width = None

	if width:
	st.write("---")
	st.write("#### 1. Chỉnh Vạch Đỏ (Quan trọng nhất)")

	# Preview frame
	preview_frame_idx = int(total_frames * 0.2)
	cap = cv2.VideoCapture(video_path)
	cap.set(cv2.CAP_PROP_POS_FRAMES, preview_frame_idx)
	ret, frame = cap.read()
	cap.release()

	if "crop_val" not in st.session_state:
	st.session_state.crop_val = 0.30

	# Giao diện nút bấm +/-
	c1, c2 = st.columns([1, 1])
	with c1:
	crop_ratio = st.number_input("Vị trí vạch đỏ:",
	min_value=0.1, max_value=0.6,
	value=st.session_state.crop_val,
	step=0.01, format="%.2f")
	with c2:
	st.info("Bấm (+) (-) để chỉnh. Vạch đỏ phải nằm NGAY TRÊN ĐẦU dòng chữ.")

	if ret:
	# Resize ảnh preview cho vừa điện thoại
	display_scale = 400 / width if width > 400 else 1.0
	small_h = int(height * display_scale)
	preview_small = cv2.resize(frame, (int(width*display_scale), small_h))

	line_y = int(small_h * (1 - crop_ratio))
	cv2.line(preview_small, (0, line_y), (preview_small.shape[1], line_y), (0, 0, 255), 2)

	st.image(preview_small, channels="BGR", caption="Ảnh xem trước")

	st.write("---")
	st.write("#### 2. Cấu hình quét")

	# --- CẤU HÌNH MỚI CHO NGƯỜI DÙNG BỊ MẤT CHỮ ---
	c3, c4 = st.columns([1, 1])
	with c3:
	# Cho phép chọn tốc độ chậm hơn (2 hoặc 3) để không sót
	frame_skip = st.selectbox("Tốc độ (Skip):", [2, 3, 5, 10], index=1,
	help="Chọn 2 hoặc 3 để quét kỹ từng chút (Lâu hơn nhưng ra đủ chữ).")
	with c4:
	# Mặc định để thấp (0.3) để chữ mờ cũng bắt được
	conf_thresh = st.number_input("Độ nhạy (0.1-1.0):", value=0.3, step=0.1)

	# Thêm nút tắt bộ lọc thông minh
	use_smart_filter = st.checkbox("⚡ Dùng bộ lọc tăng tốc (Tắt nếu bị mất chữ)", value=False)
	if not use_smart_filter:
	st.caption("🐢 Đang tắt bộ lọc: Máy sẽ quét kỹ từng khung hình (Sẽ lâu hơn nhưng chính xác nhất).")

	# Nút chạy
	if st.button("🚀 BẮT ĐẦU QUÉT", type="primary", use_container_width=True):
	try:
	ocr_engine = load_ocr_model()
	prog_bar = st.progress(0)
	status_txt = st.empty()

	# Gọi hàm với tham số mới
	subs = extract_subtitles(video_path, ocr_engine, crop_ratio, frame_skip, conf_thresh, use_smart_filter, prog_bar, status_txt)

	prog_bar.progress(100)

	if subs:
	st.success(f"✅ Xong! Tìm thấy {len(subs)} dòng.")
	srt_data = generate_srt_content(subs)
	st.download_button("📥 TẢI FILE SRT", srt_data, file_name="subtitle.srt", use_container_width=True)
	else:
	st.error("❌ Vẫn không thấy chữ. Hãy thử giảm 'Độ nhạy' xuống 0.2")
	except Exception as e:
	st.error(f"Lỗi: {e}")