OCR-Sub

Sleeping

App Files Files Community

hoanglinhn0 commited on Feb 4

Commit

01dfd90

verified ·

1 Parent(s): 5aa6225

Update app.py

Browse files

Files changed (1) hide show

app.py +95 -73

app.py CHANGED Viewed

@@ -9,15 +9,15 @@ import numpy as np
 import streamlit as st
 from rapidocr_onnxruntime import RapidOCR
-# 1. CẤU HÌNH TRANG (Giao diện mobile gọn gàng)
-st.set_page_config(page_title="OCR Android Mobile", layout="centered")
 # --- CACHE MODEL ---
 @st.cache_resource
 def load_ocr_model():
     return RapidOCR()
-# --- CÁC HÀM HỖ TRỢ ---
 def similar(a, b):
     return SequenceMatcher(None, a, b).ratio()
@@ -37,25 +37,18 @@ def get_video_info(video_path):
     cap.release()
     return width, height, fps, total_frames
-# --- ENGINE XỬ LÝ (GIỮ NGUYÊN THUẬT TOÁN TỐI ƯU) ---
-def extract_subtitles(video_path, ocr_engine, crop_ratio, frame_skip, conf_thresh, progress_bar, status_text):
     cap = cv2.VideoCapture(video_path)
     fps = cap.get(cv2.CAP_PROP_FPS)
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    orig_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-    orig_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    y_start = int(orig_h * (1 - crop_ratio))
     subs = []
     current_sub = None
-    # Tự động resize nếu video 4K
-    if orig_w > 2000:
-        resize_scale = 1920 / orig_w
-    else:
-        resize_scale = 1.0
-    prev_roi_enhanced = None
     last_text = ""
     frame_idx = 0
     pbar_cnt = 0
@@ -64,6 +57,7 @@ def extract_subtitles(video_path, ocr_engine, crop_ratio, frame_skip, conf_thres
         ret, frame = cap.read()
         if not ret: break
         if frame_idx % frame_skip != 0:
             frame_idx += 1
             continue
@@ -72,58 +66,78 @@ def extract_subtitles(video_path, ocr_engine, crop_ratio, frame_skip, conf_thres
         if pbar_cnt % 20 == 0:
             prog = min(frame_idx / total_frames, 1.0)
             progress_bar.progress(prog)
-            status_text.text(f"⏳ Đang chạy... {int(prog*100)}%")
-        roi = frame[y_start:orig_h, :]
-        if resize_scale < 1.0:
-            roi = cv2.resize(roi, (0, 0), fx=resize_scale, fy=resize_scale)
-        roi_gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
-        roi_enhanced = cv2.normalize(roi_gray, None, 0, 255, cv2.NORM_MINMAX)
         should_run_ocr = True
-        if prev_roi_enhanced is not None:
-            try:
-                diff = cv2.absdiff(roi_enhanced, prev_roi_enhanced)
-                non_zero_count = np.count_nonzero(diff > 30)
-                if non_zero_count / roi_enhanced.size < 0.05:
-                    should_run_ocr = False
-                    text = last_text
-            except: pass
-        prev_roi_enhanced = roi_enhanced
         if should_run_ocr:
-            res, _ = ocr_engine(roi)
             text = " ".join([line[1] for line in res if float(line[2]) >= conf_thresh]).strip() if res else ""
-            last_text = text
         timestamp = frame_idx / fps
         if text:
             if current_sub is None:
                 current_sub = {'start': timestamp, 'end': timestamp, 'text': text}
             else:
-                if similar(text, current_sub['text']) > 0.75:
                     current_sub['end'] = timestamp
-                    if len(text) > len(current_sub['text']): current_sub['text'] = text
                 else:
-                    if current_sub['end'] - current_sub['start'] > 0.1: subs.append(current_sub)
                     current_sub = {'start': timestamp, 'end': timestamp, 'text': text}
         else:
             if current_sub:
-                if current_sub['end'] - current_sub['start'] > 0.1: subs.append(current_sub)
                 current_sub = None
         frame_idx += 1
-    if current_sub and (current_sub['end'] - current_sub['start'] > 0.1): subs.append(current_sub)
     cap.release()
     final_subs = []
     for i, s in enumerate(subs):
-        final_subs.append({"index": i + 1, "start": format_timestamp(s['start']), "end": format_timestamp(s['end']), "text": s['text']})
     return final_subs
 def generate_srt_content(subs):
@@ -132,37 +146,36 @@ def generate_srt_content(subs):
         srt_content += f"{sub['index']}\n{sub['start']} --> {sub['end']}\n{sub['text']}\n\n"
     return srt_content
-# --- GIAO DIỆN ANDROID TỐI GIẢN ---
-st.markdown("### 📱 Video OCR (Android Mode)")
-# Upload file
 uploaded_file = st.file_uploader("Chọn Video:", type=["mp4", "avi", "mkv"])
 if uploaded_file is not None:
-    # Chunking save
     tfile = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
     chunk_size = 10 * 1024 * 1024
-    with st.spinner("Đang tải video..."):
         while True:
             chunk = uploaded_file.read(chunk_size)
             if not chunk: break
             tfile.write(chunk)
-    tfile.close()
     video_path = tfile.name
     try:
         width, height, fps, total_frames = get_video_info(video_path)
-    except:
-        width = None
     if width:
-        # --- PHẦN ĐIỀU CHỈNH VÙNG QUÉT (DỄ DÙNG CHO MOBILE) ---
         st.write("---")
-        st.write("### 1. Chỉnh Vạch Đỏ (Vùng quét)")
-        # Xem trước ảnh
-        preview_frame_idx = int(total_frames * 0.2) # Mặc định lấy frame ở 20% video
         cap = cv2.VideoCapture(video_path)
         cap.set(cv2.CAP_PROP_POS_FRAMES, preview_frame_idx)
         ret, frame = cap.read()
@@ -171,45 +184,54 @@ if uploaded_file is not None:
         if "crop_val" not in st.session_state:
             st.session_state.crop_val = 0.30
-        # Cột chia đôi để nút bấm to hơn
         c1, c2 = st.columns([1, 1])
         with c1:
-            # Thay Slider bằng Number Input (Có nút + - dễ bấm trên Android)
-            crop_ratio = st.number_input("Cao độ vạch đỏ (0.1 - 0.5)",
                                          min_value=0.1, max_value=0.6,
                                          value=st.session_state.crop_val,
-                                         step=0.01, # Bước nhảy nhỏ để chỉnh tinh
-                                         format="%.2f")
         with c2:
-             st.info("💡 Bấm dấu (+) (-) để nhích vạch đỏ lên xuống.")
-        # Hiển thị ảnh ngay bên dưới nút chỉnh
         if ret:
-            # Resize để vừa màn hình điện thoại
-            display_scale = 400 / width if width > 400 else 1.0 # 400px là vừa ngang đt
             small_h = int(height * display_scale)
             preview_small = cv2.resize(frame, (int(width*display_scale), small_h))
             line_y = int(small_h * (1 - crop_ratio))
             cv2.line(preview_small, (0, line_y), (preview_small.shape[1], line_y), (0, 0, 255), 2)
-            st.image(preview_small, channels="BGR", caption="Vạch đỏ phải nằm TRÊN đầu chữ một chút")
         st.write("---")
-        st.write("### 2. Cấu hình & Chạy")
-        with st.expander("⚙️ Cài đặt nâng cao (Bấm để mở)"):
-            frame_skip = st.selectbox("Tốc độ quét:", [5, 10, 15], index=1, help="Số lớn chạy nhanh hơn.")
-            conf_thresh = st.slider("Độ nhạy (Tin cậy):", 0.1, 1.0, 0.40)
-        # Nút chạy to rõ
-        if st.button("🚀 BẮT ĐẦU QUÉT NGAY", type="primary", use_container_width=True):
             try:
                 ocr_engine = load_ocr_model()
                 prog_bar = st.progress(0)
                 status_txt = st.empty()
-                subs = extract_subtitles(video_path, ocr_engine, crop_ratio, frame_skip, conf_thresh, prog_bar, status_txt)
                 prog_bar.progress(100)
@@ -218,6 +240,6 @@ if uploaded_file is not None:
                     srt_data = generate_srt_content(subs)
                     st.download_button("📥 TẢI FILE SRT", srt_data, file_name="subtitle.srt", use_container_width=True)
                 else:
-                    st.error("❌ Không thấy chữ! Hãy nhích vạch đỏ thấp xuống hoặc cao lên chút nữa.")
             except Exception as e:
                 st.error(f"Lỗi: {e}")

 import streamlit as st
 from rapidocr_onnxruntime import RapidOCR
+# 1. CẤU HÌNH TRANG MOBILE
+st.set_page_config(page_title="OCR Android: Chậm & Chắc", layout="centered")
 # --- CACHE MODEL ---
 @st.cache_resource
 def load_ocr_model():
     return RapidOCR()
+# --- HÀM HỖ TRỢ ---
 def similar(a, b):
     return SequenceMatcher(None, a, b).ratio()
     cap.release()
     return width, height, fps, total_frames
+# --- ENGINE XỬ LÝ (ĐÃ TINH CHỈNH ĐỂ BẮT DÍNH MỌI CHỮ) ---
+def extract_subtitles(video_path, ocr_engine, crop_ratio, frame_skip, conf_thresh, use_smart_filter, progress_bar, status_text):
     cap = cv2.VideoCapture(video_path)
     fps = cap.get(cv2.CAP_PROP_FPS)
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    y_start = int(height * (1 - crop_ratio))
     subs = []
     current_sub = None
+    prev_roi_gray = None
     last_text = ""
     frame_idx = 0
     pbar_cnt = 0
         ret, frame = cap.read()
         if not ret: break
+        # Nhảy frame (Skip)
         if frame_idx % frame_skip != 0:
             frame_idx += 1
             continue
         if pbar_cnt % 20 == 0:
             prog = min(frame_idx / total_frames, 1.0)
             progress_bar.progress(prog)
+            # Hiển thị giây hiện tại để biết máy đang chạy đến đâu
+            current_sec = int(frame_idx/fps)
+            status_text.text(f"🔍 Đang soi kỹ... {int(prog*100)}% (Giây thứ: {current_sec})")
+        # 1. Cắt vùng sub
+        roi = frame[y_start:height, :]
+        # 2. Xử lý ảnh (Smart Filter)
+        # Nếu bật chế độ này, máy sẽ so sánh với frame trước để bỏ qua nếu giống nhau
+        # Nếu tắt (False), máy sẽ OCR tất cả các frame -> Chậm nhưng KHÔNG SÓT CHỮ
         should_run_ocr = True
+        if use_smart_filter:
+            roi_gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
+            if prev_roi_gray is not None:
+                try:
+                    score = cv2.absdiff(roi_gray, prev_roi_gray)
+                    non_zero = np.count_nonzero(score > 30)
+                    if non_zero / roi_gray.size < 0.03: # Nếu thay đổi < 3%
+                        should_run_ocr = False
+                        text = last_text
+                except: pass
+            prev_roi_gray = roi_gray
+        # 3. Chạy OCR
         if should_run_ocr:
+            res, _ = ocr_engine(roi)
+            # Lọc tin cậy: Chỉ lấy chữ rõ
             text = " ".join([line[1] for line in res if float(line[2]) >= conf_thresh]).strip() if res else ""
+            last_text = text
         timestamp = frame_idx / fps
+        # 4. Logic gộp sub (Đã nới lỏng để bắt nhạy hơn)
         if text:
             if current_sub is None:
                 current_sub = {'start': timestamp, 'end': timestamp, 'text': text}
             else:
+                # Nếu giống > 70% thì gộp (Giảm từ 75 xuống 70 để đỡ bị cắt vụn)
+                if similar(text, current_sub['text']) > 0.70:
                     current_sub['end'] = timestamp
+                    # Luôn ưu tiên lấy câu dài hơn
+                    if len(text) > len(current_sub['text']):
+                        current_sub['text'] = text
                 else:
+                    # Lưu câu cũ
+                    if current_sub['end'] - current_sub['start'] > 0.1:
+                        subs.append(current_sub)
                     current_sub = {'start': timestamp, 'end': timestamp, 'text': text}
         else:
+            # Khoảng trống
             if current_sub:
+                if current_sub['end'] - current_sub['start'] > 0.1:
+                    subs.append(current_sub)
                 current_sub = None
         frame_idx += 1
+    if current_sub and (current_sub['end'] - current_sub['start'] > 0.1):
+        subs.append(current_sub)
     cap.release()
+    # Format kết quả
     final_subs = []
     for i, s in enumerate(subs):
+        final_subs.append({
+            "index": i + 1,
+            "start": format_timestamp(s['start']),
+            "end": format_timestamp(s['end']),
+            "text": s['text']
+        })
     return final_subs
 def generate_srt_content(subs):
         srt_content += f"{sub['index']}\n{sub['start']} --> {sub['end']}\n{sub['text']}\n\n"
     return srt_content
+# --- GIAO DIỆN ANDROID ---
+st.markdown("### 📱 Video OCR (Chậm mà Chắc)")
 uploaded_file = st.file_uploader("Chọn Video:", type=["mp4", "avi", "mkv"])
 if uploaded_file is not None:
+    # Lưu file tạm
     tfile = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
     chunk_size = 10 * 1024 * 1024
+    with st.status("Đang chuẩn bị...", expanded=True) as status:
         while True:
             chunk = uploaded_file.read(chunk_size)
             if not chunk: break
             tfile.write(chunk)
+        tfile.close()
+        status.update(label="Đã xong!", state="complete", expanded=False)
     video_path = tfile.name
     try:
         width, height, fps, total_frames = get_video_info(video_path)
+    except: width = None
     if width:
         st.write("---")
+        st.write("#### 1. Chỉnh Vạch Đỏ (Quan trọng nhất)")
+        # Preview frame
+        preview_frame_idx = int(total_frames * 0.2)
         cap = cv2.VideoCapture(video_path)
         cap.set(cv2.CAP_PROP_POS_FRAMES, preview_frame_idx)
         ret, frame = cap.read()
         if "crop_val" not in st.session_state:
             st.session_state.crop_val = 0.30
+        # Giao diện nút bấm +/-
         c1, c2 = st.columns([1, 1])
         with c1:
+            crop_ratio = st.number_input("Vị trí vạch đỏ:",
                                          min_value=0.1, max_value=0.6,
                                          value=st.session_state.crop_val,
+                                         step=0.01, format="%.2f")
         with c2:
+             st.info("Bấm (+) (-) để chỉnh. Vạch đỏ phải nằm **NGAY TRÊN ĐẦU** dòng chữ.")
         if ret:
+            # Resize ảnh preview cho vừa điện thoại
+            display_scale = 400 / width if width > 400 else 1.0
             small_h = int(height * display_scale)
             preview_small = cv2.resize(frame, (int(width*display_scale), small_h))
             line_y = int(small_h * (1 - crop_ratio))
             cv2.line(preview_small, (0, line_y), (preview_small.shape[1], line_y), (0, 0, 255), 2)
+            st.image(preview_small, channels="BGR", caption="Ảnh xem trước")
         st.write("---")
+        st.write("#### 2. Cấu hình quét")
+        # --- CẤU HÌNH MỚI CHO NGƯỜI DÙNG BỊ MẤT CHỮ ---
+        c3, c4 = st.columns([1, 1])
+        with c3:
+            # Cho phép chọn tốc độ chậm hơn (2 hoặc 3) để không sót
+            frame_skip = st.selectbox("Tốc độ (Skip):", [2, 3, 5, 10], index=1,
+                                      help="Chọn 2 hoặc 3 để quét kỹ từng chút (Lâu hơn nhưng ra đủ chữ).")
+        with c4:
+            # Mặc định để thấp (0.3) để chữ mờ cũng bắt được
+            conf_thresh = st.number_input("Độ nhạy (0.1-1.0):", value=0.3, step=0.1)
+        # Thêm nút tắt bộ lọc thông minh
+        use_smart_filter = st.checkbox("⚡ Dùng bộ lọc tăng tốc (Tắt nếu bị mất chữ)", value=False)
+        if not use_smart_filter:
+            st.caption("🐢 Đang tắt bộ lọc: Máy sẽ quét kỹ từng khung hình (Sẽ lâu hơn nhưng chính xác nhất).")
+        # Nút chạy
+        if st.button("🚀 BẮT ĐẦU QUÉT", type="primary", use_container_width=True):
             try:
                 ocr_engine = load_ocr_model()
                 prog_bar = st.progress(0)
                 status_txt = st.empty()
+                # Gọi hàm với tham số mới
+                subs = extract_subtitles(video_path, ocr_engine, crop_ratio, frame_skip, conf_thresh, use_smart_filter, prog_bar, status_txt)
                 prog_bar.progress(100)
                     srt_data = generate_srt_content(subs)
                     st.download_button("📥 TẢI FILE SRT", srt_data, file_name="subtitle.srt", use_container_width=True)
                 else:
+                    st.error("❌ Vẫn không thấy chữ. Hãy thử giảm 'Độ nhạy' xuống 0.2")
             except Exception as e:
                 st.error(f"Lỗi: {e}")