Spaces:

lianghsun
/

Free-OCR

Sleeping

App Files Files Community

Free-OCR / src /streamlit_app.py

lianghsun

Update code

4bd743d 2 months ago

raw

history blame contribute delete

6.77 kB

	import streamlit as st
	import os
	import requests
	import json
	import base64
	import uuid
	import re
	from pdf2image import convert_from_bytes
	from io import BytesIO

	st.set_page_config(page_title="DotsOCR", page_icon="🔍", layout="wide")

	# === Session State 初始化 (防止變數不存在報錯) ===
	if "total_pages" not in st.session_state:
	st.session_state.total_pages = 0
	if "pdf_images" not in st.session_state:
	st.session_state.pdf_images = []
	if "ocr_results" not in st.session_state:
	st.session_state.ocr_results = {}
	if "current_file_hash" not in st.session_state:
	st.session_state.current_file_hash = None
	if "task_id" not in st.session_state:
	st.session_state.task_id = None

	# 環境變數讀取
	PROXY_BASE_URL = os.getenv("PROXY_URL", "")
	MODEL_NAME = os.getenv("MODEL_NAME", "/model/DotsOCR")


	def upload_pdf_to_proxy(task_id, file_bytes, filename):
	try:
	if not PROXY_BASE_URL:
	return
	url = f"{PROXY_BASE_URL}/upload_pdf"
	files = {'file': (filename, file_bytes, 'application/pdf')}
	data = {'task_id': task_id}
	# 加上 header 避開 ngrok 警告
	headers = {"ngrok-skip-browser-warning": "true"}
	requests.post(url, files=files, data=data, headers=headers, timeout=60)
	except:
	pass


	def image_to_base64(image):
	buffered = BytesIO()
	image.save(buffered, format="JPEG")
	return base64.b64encode(buffered.getvalue()).decode('utf-8')


	def call_proxy_api(task_id, page_num, image):
	b64_img = image_to_base64(image)
	system_prompt = "Parse the content of this image into markdown text."

	payload = {
	"model": MODEL_NAME,
	"messages": [
	{
	"role": "user",
	"content": [
	{"type": "text", "text": system_prompt},
	{"type": "image_url", "image_url": {
	"url": f"data:image/jpeg;base64,{b64_img}"}}
	]
	}
	],
	"max_tokens": 4096,
	"temperature": 0.1
	}

	headers = {
	"X-Task-ID": task_id,
	"X-Page-Num": str(page_num),
	"ngrok-skip-browser-warning": "true"
	}

	try:
	url = f"{PROXY_BASE_URL}/v1/chat/completions"
	response = requests.post(
	url, json=payload, headers=headers, timeout=120)

	if response.status_code == 200:
	res_json = response.json()
	content = res_json.get('choices', [{}])[0].get(
	'message', {}).get('content', "")
	return True, content
	else:
	return False, f"Proxy Error {response.status_code}"

	except Exception as e:
	return False, f"Connection Error: {str(e)}"


	def download_pdf_from_url(url):
	try:
	if "drive.google.com" in url and "/file/d/" in url:
	match = re.search(r'/file/d/([a-zA-Z0-9_-]+)', url)
	if match:
	url = f"https://drive.google.com/uc?export=download&id={match.group(1)}"
	headers = {"User-Agent": "Mozilla/5.0"}
	resp = requests.get(url, headers=headers, stream=True, timeout=30)
	resp.raise_for_status()
	return resp.content, None
	except Exception as e:
	return None, str(e)


	# === UI 開始 ===
	st.title("🔍 DotsOCR Service")

	with st.sidebar:
	st.header("Input Source")
	input_type = st.radio("Select:", ["Upload PDF", "PDF URL"])
	pdf_bytes = None
	file_name = "document.pdf"

	if input_type == "Upload PDF":
	uploaded_file = st.file_uploader("Choose file", type=["pdf"])
	if uploaded_file:
	pdf_bytes = uploaded_file.read()
	file_name = uploaded_file.name
	else:
	url = st.text_input("URL:")
	if url and st.button("Fetch"):
	with st.spinner("Downloading..."):
	data, err = download_pdf_from_url(url)
	if data:
	pdf_bytes = data
	else:
	st.error("Download failed")

	if pdf_bytes:
	# 檢查是否為新檔案 (Hash 變更)
	if st.session_state.current_file_hash != hash(pdf_bytes):
	st.session_state.ocr_results = {}
	st.session_state.current_file_hash = hash(pdf_bytes)
	st.session_state.task_id = str(uuid.uuid4())

	# 1. 背景上傳原始檔到 Proxy
	upload_pdf_to_proxy(st.session_state.task_id, pdf_bytes, file_name)

	# 2. 轉檔圖片
	with st.spinner("Initializing Document..."):
	try:
	images = convert_from_bytes(pdf_bytes, dpi=150)
	st.session_state.pdf_images = images
	st.session_state.total_pages = len(images)
	except:
	st.error("Failed to process PDF.")
	st.stop()

	st.divider()

	# 這裡確保 total_pages 已經存在
	if st.session_state.total_pages > 0:
	# 分頁滑桿
	page_idx = st.slider("Page Navigation", 1,
	st.session_state.total_pages, 1) - 1
	current_image = st.session_state.pdf_images[page_idx]

	col1, col2 = st.columns([1, 1])

	with col1:
	st.image(current_image,
	caption=f"Page {page_idx + 1}", use_container_width=True)

	# === [核心修改] 自動執行邏輯 ===
	# 如果這一頁還沒有結果，直接開始跑！不用按鈕！
	if page_idx not in st.session_state.ocr_results:
	with st.spinner(f"⚡ Auto-processing Page {page_idx + 1}..."):
	success, res = call_proxy_api(
	st.session_state.task_id, page_idx + 1, current_image)

	# 存入結果 (成功是內容，失敗是錯誤訊息)
	st.session_state.ocr_results[page_idx] = res

	# 重新載入頁面，讓結果顯示出來
	st.rerun()

	with col2:
	# 顯示結果區域
	if page_idx in st.session_state.ocr_results:
	res_txt = st.session_state.ocr_results[page_idx]

	# 簡單判斷是不是錯誤訊息
	if "Proxy Error" in res_txt or "Connection Error" in res_txt:
	st.error(f"❌ {res_txt}")
	# 提供重試按鈕
	if st.button("🔄 Retry Page"):
	del st.session_state.ocr_results[page_idx]
	st.rerun()
	else:
	st.success("✅ Analysis Complete")
	st.markdown(res_txt)
	with st.expander("Show Raw Text"):
	st.text_area("Raw", res_txt, height=200)
	else:
	st.info("Waiting for processor...")