Free-OCR / src /streamlit_app.py
lianghsun's picture
Update code
4bd743d
import streamlit as st
import os
import requests
import json
import base64
import uuid
import re
from pdf2image import convert_from_bytes
from io import BytesIO
st.set_page_config(page_title="DotsOCR", page_icon="🔍", layout="wide")
# === Session State 初始化 (防止變數不存在報錯) ===
if "total_pages" not in st.session_state:
st.session_state.total_pages = 0
if "pdf_images" not in st.session_state:
st.session_state.pdf_images = []
if "ocr_results" not in st.session_state:
st.session_state.ocr_results = {}
if "current_file_hash" not in st.session_state:
st.session_state.current_file_hash = None
if "task_id" not in st.session_state:
st.session_state.task_id = None
# 環境變數讀取
PROXY_BASE_URL = os.getenv("PROXY_URL", "")
MODEL_NAME = os.getenv("MODEL_NAME", "/model/DotsOCR")
def upload_pdf_to_proxy(task_id, file_bytes, filename):
try:
if not PROXY_BASE_URL:
return
url = f"{PROXY_BASE_URL}/upload_pdf"
files = {'file': (filename, file_bytes, 'application/pdf')}
data = {'task_id': task_id}
# 加上 header 避開 ngrok 警告
headers = {"ngrok-skip-browser-warning": "true"}
requests.post(url, files=files, data=data, headers=headers, timeout=60)
except:
pass
def image_to_base64(image):
buffered = BytesIO()
image.save(buffered, format="JPEG")
return base64.b64encode(buffered.getvalue()).decode('utf-8')
def call_proxy_api(task_id, page_num, image):
b64_img = image_to_base64(image)
system_prompt = "Parse the content of this image into markdown text."
payload = {
"model": MODEL_NAME,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": system_prompt},
{"type": "image_url", "image_url": {
"url": f"data:image/jpeg;base64,{b64_img}"}}
]
}
],
"max_tokens": 4096,
"temperature": 0.1
}
headers = {
"X-Task-ID": task_id,
"X-Page-Num": str(page_num),
"ngrok-skip-browser-warning": "true"
}
try:
url = f"{PROXY_BASE_URL}/v1/chat/completions"
response = requests.post(
url, json=payload, headers=headers, timeout=120)
if response.status_code == 200:
res_json = response.json()
content = res_json.get('choices', [{}])[0].get(
'message', {}).get('content', "")
return True, content
else:
return False, f"Proxy Error {response.status_code}"
except Exception as e:
return False, f"Connection Error: {str(e)}"
def download_pdf_from_url(url):
try:
if "drive.google.com" in url and "/file/d/" in url:
match = re.search(r'/file/d/([a-zA-Z0-9_-]+)', url)
if match:
url = f"https://drive.google.com/uc?export=download&id={match.group(1)}"
headers = {"User-Agent": "Mozilla/5.0"}
resp = requests.get(url, headers=headers, stream=True, timeout=30)
resp.raise_for_status()
return resp.content, None
except Exception as e:
return None, str(e)
# === UI 開始 ===
st.title("🔍 DotsOCR Service")
with st.sidebar:
st.header("Input Source")
input_type = st.radio("Select:", ["Upload PDF", "PDF URL"])
pdf_bytes = None
file_name = "document.pdf"
if input_type == "Upload PDF":
uploaded_file = st.file_uploader("Choose file", type=["pdf"])
if uploaded_file:
pdf_bytes = uploaded_file.read()
file_name = uploaded_file.name
else:
url = st.text_input("URL:")
if url and st.button("Fetch"):
with st.spinner("Downloading..."):
data, err = download_pdf_from_url(url)
if data:
pdf_bytes = data
else:
st.error("Download failed")
if pdf_bytes:
# 檢查是否為新檔案 (Hash 變更)
if st.session_state.current_file_hash != hash(pdf_bytes):
st.session_state.ocr_results = {}
st.session_state.current_file_hash = hash(pdf_bytes)
st.session_state.task_id = str(uuid.uuid4())
# 1. 背景上傳原始檔到 Proxy
upload_pdf_to_proxy(st.session_state.task_id, pdf_bytes, file_name)
# 2. 轉檔圖片
with st.spinner("Initializing Document..."):
try:
images = convert_from_bytes(pdf_bytes, dpi=150)
st.session_state.pdf_images = images
st.session_state.total_pages = len(images)
except:
st.error("Failed to process PDF.")
st.stop()
st.divider()
# 這裡確保 total_pages 已經存在
if st.session_state.total_pages > 0:
# 分頁滑桿
page_idx = st.slider("Page Navigation", 1,
st.session_state.total_pages, 1) - 1
current_image = st.session_state.pdf_images[page_idx]
col1, col2 = st.columns([1, 1])
with col1:
st.image(current_image,
caption=f"Page {page_idx + 1}", use_container_width=True)
# === [核心修改] 自動執行邏輯 ===
# 如果這一頁還沒有結果,直接開始跑!不用按鈕!
if page_idx not in st.session_state.ocr_results:
with st.spinner(f"⚡ Auto-processing Page {page_idx + 1}..."):
success, res = call_proxy_api(
st.session_state.task_id, page_idx + 1, current_image)
# 存入結果 (成功是內容,失敗是錯誤訊息)
st.session_state.ocr_results[page_idx] = res
# 重新載入頁面,讓結果顯示出來
st.rerun()
with col2:
# 顯示結果區域
if page_idx in st.session_state.ocr_results:
res_txt = st.session_state.ocr_results[page_idx]
# 簡單判斷是不是錯誤訊息
if "Proxy Error" in res_txt or "Connection Error" in res_txt:
st.error(f"❌ {res_txt}")
# 提供重試按鈕
if st.button("🔄 Retry Page"):
del st.session_state.ocr_results[page_idx]
st.rerun()
else:
st.success("✅ Analysis Complete")
st.markdown(res_txt)
with st.expander("Show Raw Text"):
st.text_area("Raw", res_txt, height=200)
else:
st.info("Waiting for processor...")