import streamlit as st
import os
import requests
import json
import base64
import uuid
import re
from pdf2image import convert_from_bytes
from io import BytesIO

st.set_page_config(page_title="DotsOCR", page_icon="🔍", layout="wide")

# === Session State 初始化 (防止變數不存在報錯) ===
if "total_pages" not in st.session_state:
    st.session_state.total_pages = 0
if "pdf_images" not in st.session_state:
    st.session_state.pdf_images = []
if "ocr_results" not in st.session_state:
    st.session_state.ocr_results = {}
if "current_file_hash" not in st.session_state:
    st.session_state.current_file_hash = None
if "task_id" not in st.session_state:
    st.session_state.task_id = None

# 環境變數讀取
PROXY_BASE_URL = os.getenv("PROXY_URL", "")
MODEL_NAME = os.getenv("MODEL_NAME", "/model/DotsOCR")


def upload_pdf_to_proxy(task_id, file_bytes, filename):
    try:
        if not PROXY_BASE_URL:
            return
        url = f"{PROXY_BASE_URL}/upload_pdf"
        files = {'file': (filename, file_bytes, 'application/pdf')}
        data = {'task_id': task_id}
        # 加上 header 避開 ngrok 警告
        headers = {"ngrok-skip-browser-warning": "true"}
        requests.post(url, files=files, data=data, headers=headers, timeout=60)
    except:
        pass


def image_to_base64(image):
    buffered = BytesIO()
    image.save(buffered, format="JPEG")
    return base64.b64encode(buffered.getvalue()).decode('utf-8')


def call_proxy_api(task_id, page_num, image):
    b64_img = image_to_base64(image)
    system_prompt = "Parse the content of this image into markdown text."

    payload = {
        "model": MODEL_NAME,
        "messages": [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": system_prompt},
                    {"type": "image_url", "image_url": {
                        "url": f"data:image/jpeg;base64,{b64_img}"}}
                ]
            }
        ],
        "max_tokens": 4096,
        "temperature": 0.1
    }

    headers = {
        "X-Task-ID": task_id,
        "X-Page-Num": str(page_num),
        "ngrok-skip-browser-warning": "true"
    }

    try:
        url = f"{PROXY_BASE_URL}/v1/chat/completions"
        response = requests.post(
            url, json=payload, headers=headers, timeout=120)

        if response.status_code == 200:
            res_json = response.json()
            content = res_json.get('choices', [{}])[0].get(
                'message', {}).get('content', "")
            return True, content
        else:
            return False, f"Proxy Error {response.status_code}"

    except Exception as e:
        return False, f"Connection Error: {str(e)}"


def download_pdf_from_url(url):
    try:
        if "drive.google.com" in url and "/file/d/" in url:
            match = re.search(r'/file/d/([a-zA-Z0-9_-]+)', url)
            if match:
                url = f"https://drive.google.com/uc?export=download&id={match.group(1)}"
        headers = {"User-Agent": "Mozilla/5.0"}
        resp = requests.get(url, headers=headers, stream=True, timeout=30)
        resp.raise_for_status()
        return resp.content, None
    except Exception as e:
        return None, str(e)


# === UI 開始 ===
st.title("🔍 DotsOCR Service")

with st.sidebar:
    st.header("Input Source")
    input_type = st.radio("Select:", ["Upload PDF", "PDF URL"])
    pdf_bytes = None
    file_name = "document.pdf"

    if input_type == "Upload PDF":
        uploaded_file = st.file_uploader("Choose file", type=["pdf"])
        if uploaded_file:
            pdf_bytes = uploaded_file.read()
            file_name = uploaded_file.name
    else:
        url = st.text_input("URL:")
        if url and st.button("Fetch"):
            with st.spinner("Downloading..."):
                data, err = download_pdf_from_url(url)
                if data:
                    pdf_bytes = data
                else:
                    st.error("Download failed")

if pdf_bytes:
    # 檢查是否為新檔案 (Hash 變更)
    if st.session_state.current_file_hash != hash(pdf_bytes):
        st.session_state.ocr_results = {}
        st.session_state.current_file_hash = hash(pdf_bytes)
        st.session_state.task_id = str(uuid.uuid4())

        # 1. 背景上傳原始檔到 Proxy
        upload_pdf_to_proxy(st.session_state.task_id, pdf_bytes, file_name)

        # 2. 轉檔圖片
        with st.spinner("Initializing Document..."):
            try:
                images = convert_from_bytes(pdf_bytes, dpi=150)
                st.session_state.pdf_images = images
                st.session_state.total_pages = len(images)
            except:
                st.error("Failed to process PDF.")
                st.stop()

    st.divider()

    # 這裡確保 total_pages 已經存在
    if st.session_state.total_pages > 0:
        # 分頁滑桿
        page_idx = st.slider("Page Navigation", 1,
                             st.session_state.total_pages, 1) - 1
        current_image = st.session_state.pdf_images[page_idx]

        col1, col2 = st.columns([1, 1])

        with col1:
            st.image(current_image,
                     caption=f"Page {page_idx + 1}", use_container_width=True)

            # === [核心修改] 自動執行邏輯 ===
            # 如果這一頁還沒有結果，直接開始跑！不用按鈕！
            if page_idx not in st.session_state.ocr_results:
                with st.spinner(f"⚡ Auto-processing Page {page_idx + 1}..."):
                    success, res = call_proxy_api(
                        st.session_state.task_id, page_idx + 1, current_image)

                    # 存入結果 (成功是內容，失敗是錯誤訊息)
                    st.session_state.ocr_results[page_idx] = res

                    # 重新載入頁面，讓結果顯示出來
                    st.rerun()

        with col2:
            # 顯示結果區域
            if page_idx in st.session_state.ocr_results:
                res_txt = st.session_state.ocr_results[page_idx]

                # 簡單判斷是不是錯誤訊息
                if "Proxy Error" in res_txt or "Connection Error" in res_txt:
                    st.error(f"❌ {res_txt}")
                    # 提供重試按鈕
                    if st.button("🔄 Retry Page"):
                        del st.session_state.ocr_results[page_idx]
                        st.rerun()
                else:
                    st.success("✅ Analysis Complete")
                    st.markdown(res_txt)
                    with st.expander("Show Raw Text"):
                        st.text_area("Raw", res_txt, height=200)
            else:
                st.info("Waiting for processor...")