import streamlit as st import os import requests import json import base64 import uuid import re from pdf2image import convert_from_bytes from io import BytesIO st.set_page_config(page_title="DotsOCR", page_icon="🔍", layout="wide") # === Session State 初始化 (防止變數不存在報錯) === if "total_pages" not in st.session_state: st.session_state.total_pages = 0 if "pdf_images" not in st.session_state: st.session_state.pdf_images = [] if "ocr_results" not in st.session_state: st.session_state.ocr_results = {} if "current_file_hash" not in st.session_state: st.session_state.current_file_hash = None if "task_id" not in st.session_state: st.session_state.task_id = None # 環境變數讀取 PROXY_BASE_URL = os.getenv("PROXY_URL", "") MODEL_NAME = os.getenv("MODEL_NAME", "/model/DotsOCR") def upload_pdf_to_proxy(task_id, file_bytes, filename): try: if not PROXY_BASE_URL: return url = f"{PROXY_BASE_URL}/upload_pdf" files = {'file': (filename, file_bytes, 'application/pdf')} data = {'task_id': task_id} # 加上 header 避開 ngrok 警告 headers = {"ngrok-skip-browser-warning": "true"} requests.post(url, files=files, data=data, headers=headers, timeout=60) except: pass def image_to_base64(image): buffered = BytesIO() image.save(buffered, format="JPEG") return base64.b64encode(buffered.getvalue()).decode('utf-8') def call_proxy_api(task_id, page_num, image): b64_img = image_to_base64(image) system_prompt = "Parse the content of this image into markdown text." payload = { "model": MODEL_NAME, "messages": [ { "role": "user", "content": [ {"type": "text", "text": system_prompt}, {"type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{b64_img}"}} ] } ], "max_tokens": 4096, "temperature": 0.1 } headers = { "X-Task-ID": task_id, "X-Page-Num": str(page_num), "ngrok-skip-browser-warning": "true" } try: url = f"{PROXY_BASE_URL}/v1/chat/completions" response = requests.post( url, json=payload, headers=headers, timeout=120) if response.status_code == 200: res_json = response.json() content = res_json.get('choices', [{}])[0].get( 'message', {}).get('content', "") return True, content else: return False, f"Proxy Error {response.status_code}" except Exception as e: return False, f"Connection Error: {str(e)}" def download_pdf_from_url(url): try: if "drive.google.com" in url and "/file/d/" in url: match = re.search(r'/file/d/([a-zA-Z0-9_-]+)', url) if match: url = f"https://drive.google.com/uc?export=download&id={match.group(1)}" headers = {"User-Agent": "Mozilla/5.0"} resp = requests.get(url, headers=headers, stream=True, timeout=30) resp.raise_for_status() return resp.content, None except Exception as e: return None, str(e) # === UI 開始 === st.title("🔍 DotsOCR Service") with st.sidebar: st.header("Input Source") input_type = st.radio("Select:", ["Upload PDF", "PDF URL"]) pdf_bytes = None file_name = "document.pdf" if input_type == "Upload PDF": uploaded_file = st.file_uploader("Choose file", type=["pdf"]) if uploaded_file: pdf_bytes = uploaded_file.read() file_name = uploaded_file.name else: url = st.text_input("URL:") if url and st.button("Fetch"): with st.spinner("Downloading..."): data, err = download_pdf_from_url(url) if data: pdf_bytes = data else: st.error("Download failed") if pdf_bytes: # 檢查是否為新檔案 (Hash 變更) if st.session_state.current_file_hash != hash(pdf_bytes): st.session_state.ocr_results = {} st.session_state.current_file_hash = hash(pdf_bytes) st.session_state.task_id = str(uuid.uuid4()) # 1. 背景上傳原始檔到 Proxy upload_pdf_to_proxy(st.session_state.task_id, pdf_bytes, file_name) # 2. 轉檔圖片 with st.spinner("Initializing Document..."): try: images = convert_from_bytes(pdf_bytes, dpi=150) st.session_state.pdf_images = images st.session_state.total_pages = len(images) except: st.error("Failed to process PDF.") st.stop() st.divider() # 這裡確保 total_pages 已經存在 if st.session_state.total_pages > 0: # 分頁滑桿 page_idx = st.slider("Page Navigation", 1, st.session_state.total_pages, 1) - 1 current_image = st.session_state.pdf_images[page_idx] col1, col2 = st.columns([1, 1]) with col1: st.image(current_image, caption=f"Page {page_idx + 1}", use_container_width=True) # === [核心修改] 自動執行邏輯 === # 如果這一頁還沒有結果,直接開始跑!不用按鈕! if page_idx not in st.session_state.ocr_results: with st.spinner(f"⚡ Auto-processing Page {page_idx + 1}..."): success, res = call_proxy_api( st.session_state.task_id, page_idx + 1, current_image) # 存入結果 (成功是內容,失敗是錯誤訊息) st.session_state.ocr_results[page_idx] = res # 重新載入頁面,讓結果顯示出來 st.rerun() with col2: # 顯示結果區域 if page_idx in st.session_state.ocr_results: res_txt = st.session_state.ocr_results[page_idx] # 簡單判斷是不是錯誤訊息 if "Proxy Error" in res_txt or "Connection Error" in res_txt: st.error(f"❌ {res_txt}") # 提供重試按鈕 if st.button("🔄 Retry Page"): del st.session_state.ocr_results[page_idx] st.rerun() else: st.success("✅ Analysis Complete") st.markdown(res_txt) with st.expander("Show Raw Text"): st.text_area("Raw", res_txt, height=200) else: st.info("Waiting for processor...")