import gc
import types
import sys
import hashlib
import json
import math
import os
import re
from io import BytesIO
from typing import Any, Dict, List, Optional, Tuple

import fitz  # PyMuPDF
import gradio as gr
import requests
import torch
from huggingface_hub import snapshot_download
from PIL import Image, ImageDraw, ImageFont
from qwen_vl_utils import process_vision_info
from transformers import AutoModelForCausalLM, AutoProcessor

from .utils.constants import IMAGE_FACTOR, MAX_PIXELS, MIN_PIXELS
from .utils.prompts import dict_promptmode_to_prompt

APP_TITLE = "PreviewSpace — VLM Playground (Local)"
TMP_DIR = "/tmp/previewspace"
MODELS_DIR = os.path.join(TMP_DIR, "models")
DOTS_REPO_ID = "rednote-hilab/dots.ocr"
DOTS_LOCAL_DIR = os.path.join(MODELS_DIR, "dots.ocr")

LOCAL_DEFAULT_MAX_NEW_TOKENS = 2048

os.makedirs(TMP_DIR, exist_ok=True)
os.makedirs(MODELS_DIR, exist_ok=True)


def round_by_factor(number: int, factor: int) -> int:
    return round(number / factor) * factor


def smart_resize(
    height: int,
    width: int,
    factor: int = IMAGE_FACTOR,
    min_pixels: int = MIN_PIXELS,
    max_pixels: int = MAX_PIXELS,
) -> Tuple[int, int]:
    if max(height, width) / min(height, width) > 200:
        raise ValueError("absolute aspect ratio must be smaller than 200")
    h_bar = max(factor, round_by_factor(height, factor))
    w_bar = max(factor, round_by_factor(width, factor))

    if h_bar * w_bar > max_pixels:
        beta = math.sqrt((height * width) / max_pixels)
        h_bar = round_by_factor(height / beta, factor)
        w_bar = round_by_factor(width / beta, factor)
    elif h_bar * w_bar < min_pixels:
        beta = math.sqrt(min_pixels / (height * width))
        h_bar = round_by_factor(height * beta, factor)
        w_bar = round_by_factor(width * beta, factor)
    return int(h_bar), int(w_bar)


def fetch_image(image_input: Any) -> Image.Image:
    if isinstance(image_input, str):
        if image_input.startswith(("http://", "https://")):
            response = requests.get(image_input, timeout=60)
            image = Image.open(BytesIO(response.content)).convert("RGB")
        else:
            image = Image.open(image_input).convert("RGB")
    elif isinstance(image_input, Image.Image):
        image = image_input.convert("RGB")
    else:
        raise ValueError(f"Invalid image input type: {type(image_input)}")
    return image


def load_images_from_pdf(pdf_path: str) -> List[Image.Image]:
    images: List[Image.Image] = []
    pdf_document = fitz.open(pdf_path)
    try:
        for page_idx in range(len(pdf_document)):
            page = pdf_document.load_page(page_idx)
            pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
            img_data = pix.tobytes("ppm")
            image = Image.open(BytesIO(img_data)).convert("RGB")
            images.append(image)
    finally:
        pdf_document.close()
    return images


def file_checksum(path: str, chunk_size: int = 1 << 20) -> str:
    hasher = hashlib.sha256()
    with open(path, "rb") as f:
        while True:
            chunk = f.read(chunk_size)
            if not chunk:
                break
            hasher.update(chunk)
    return hasher.hexdigest()


def draw_layout_on_image(image: Image.Image, layout_data: List[Dict]) -> Image.Image:
    img = image.copy()
    draw = ImageDraw.Draw(img)
    colors = {
        "Caption": "#FF6B6B",
        "Footnote": "#4ECDC4",
        "Formula": "#45B7D1",
        "List-item": "#96CEB4",
        "Page-footer": "#FFEAA7",
        "Page-header": "#DDA0DD",
        "Picture": "#FFD93D",
        "Section-header": "#6C5CE7",
        "Table": "#FD79A8",
        "Text": "#74B9FF",
        "Title": "#E17055",
    }

    try:
        try:
            font = ImageFont.truetype(
                "/System/Library/Fonts/Supplemental/Arial Bold.ttf", 12
            )
        except Exception:
            try:
                font = ImageFont.truetype(
                    "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 12
                )
            except Exception:
                font = ImageFont.load_default()

        for item in layout_data:
            bbox = item.get("bbox")
            category = item.get("category")
            if not bbox or not category:
                continue
            color = colors.get(category, "#000000")
            draw.rectangle(bbox, outline=color, width=2)
            label = str(category)
            label_bbox = draw.textbbox((0, 0), label, font=font)
            label_w = label_bbox[2] - label_bbox[0]
            label_h = label_bbox[3] - label_bbox[1]
            x1, y1 = int(bbox[0]), int(bbox[1])
            lx = x1
            ly = max(0, y1 - label_h - 2)
            draw.rectangle([lx, ly, lx + label_w + 4, ly + label_h + 2], fill=color)
            draw.text((lx + 2, ly + 1), label, fill="white", font=font)
    except Exception:
        pass
    return img


def is_arabic_text(text: str) -> bool:
    if not text:
        return False
    header_pattern = r"^#{1,6}\s+(.+)$"
    paragraph_pattern = r"^(?!#{1,6}\s|!\[|```|\||\s*[-*+]\s|\s*\d+\.\s)(.+)$"
    content_lines: List[str] = []
    for line in text.split("\n"):
        s = line.strip()
        if not s:
            continue
        m = re.match(header_pattern, s)
        if m:
            content_lines.append(m.group(1))
            continue
        if re.match(paragraph_pattern, s):
            content_lines.append(s)
    if not content_lines:
        return False
    combined = " ".join(content_lines)
    arabic = 0
    total = 0
    for ch in combined:
        if ch.isalpha():
            total += 1
            if (
                ("\u0600" <= ch <= "\u06ff")
                or ("\u0750" <= ch <= "\u077f")
                or ("\u08a0" <= ch <= "\u08ff")
            ):
                arabic += 1
    if total == 0:
        return False
    return (arabic / total) > 0.5


def extract_json(text: str) -> Optional[Dict[str, Any]]:
    if not text:
        return None
    try:
        return json.loads(text)
    except Exception:
        pass
    brace_start = text.find("{")
    brace_end = text.rfind("}")
    if 0 <= brace_start < brace_end:
        snippet = text[brace_start : brace_end + 1]
        try:
            return json.loads(snippet)
        except Exception:
            pass
    fenced = re.findall(r"```json\s*([\s\S]*?)\s*```", text)
    for block in fenced:
        try:
            return json.loads(block)
        except Exception:
            continue
    return None


model: Optional[AutoModelForCausalLM] = None
processor: Optional[AutoProcessor] = None


def ensure_model_loaded() -> Tuple[AutoModelForCausalLM, AutoProcessor]:
    global model, processor
    if model is not None and processor is not None:
        return model, processor

    os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS_WARNING", "1")
    snapshot_download(
        repo_id=DOTS_REPO_ID,
        local_dir=DOTS_LOCAL_DIR,
        local_dir_use_symlinks=False,
    )

    # Work around transformers dynamic module parent package issue with repo name containing a dot
    # Ensure 'transformers_modules' and 'transformers_modules.dots' exist as packages
    if "transformers_modules" not in sys.modules:
        pkg = types.ModuleType("transformers_modules")
        pkg.__path__ = []  # type: ignore[attr-defined]
        sys.modules["transformers_modules"] = pkg
    if "transformers_modules.dots" not in sys.modules:
        subpkg = types.ModuleType("transformers_modules.dots")
        subpkg.__path__ = []  # type: ignore[attr-defined]
        sys.modules["transformers_modules.dots"] = subpkg

    use_mps = torch.backends.mps.is_available()
    dtype = (
        torch.float16
        if use_mps
        else (torch.bfloat16 if torch.cuda.is_available() else torch.float32)
    )

    model = AutoModelForCausalLM.from_pretrained(
        DOTS_LOCAL_DIR,
        torch_dtype=dtype,
        trust_remote_code=True,
        low_cpu_mem_usage=True,
    )
    if use_mps:
        model.to("mps")

    proc = AutoProcessor.from_pretrained(DOTS_LOCAL_DIR, trust_remote_code=True)
    processor = proc
    return model, processor


def run_inference(
    image: Image.Image,
    prompt_text: str,
    max_new_tokens: int = LOCAL_DEFAULT_MAX_NEW_TOKENS,
) -> str:
    mdl, proc = ensure_model_loaded()
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "image", "image": image},
                {"type": "text", "text": prompt_text},
            ],
        }
    ]
    text = proc.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    image_inputs, video_inputs = process_vision_info(messages)
    inputs = proc(
        text=[text],
        images=image_inputs,
        videos=video_inputs,
        padding=True,
        return_tensors="pt",
    )
    device = (
        "mps"
        if torch.backends.mps.is_available()
        else ("cuda" if torch.cuda.is_available() else "cpu")
    )
    inputs = {k: v.to(device) if hasattr(v, "to") else v for k, v in inputs.items()}
    with torch.no_grad():
        generated_ids = mdl.generate(
            **inputs,
            max_new_tokens=int(max_new_tokens),
            do_sample=False,
            temperature=0.1,
        )
    trimmed = [
        out_ids[len(in_ids) :]
        for in_ids, out_ids in zip(inputs["input_ids"], generated_ids)
    ]
    output_text = processor.batch_decode(
        trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
    )
    return output_text[0] if output_text else ""


def process_single_image(
    image: Image.Image,
    prompt_text: str,
    max_new_tokens: int,
) -> Dict[str, Any]:
    img = fetch_image(image)
    raw = run_inference(img, prompt_text, max_new_tokens=max_new_tokens)
    result: Dict[str, Any] = {
        "original_image": img,
        "processed_image": img,
        "raw_output": raw,
        "layout_result": None,
        "markdown": None,
    }
    data = extract_json(raw)
    if isinstance(data, dict):
        result["layout_result"] = data
        items = data.get("elements", data.get("elements_list", data.get("content", [])))
        if isinstance(items, list):
            result["processed_image"] = draw_layout_on_image(img, items)
            result["markdown"] = layoutjson2md(img, items)
    if result["markdown"] is None:
        result["markdown"] = raw
    return result


def layoutjson2md(
    image: Image.Image, layout_data: List[Dict], text_key: str = "text"
) -> str:
    lines: List[str] = []
    try:
        items = sorted(
            layout_data,
            key=lambda x: (
                x.get("bbox", [0, 0, 0, 0])[1],
                x.get("bbox", [0, 0, 0, 0])[0],
            ),
        )
        for item in items:
            category = item.get("category", "")
            text = item.get(text_key, "")
            if category == "Title" and text:
                lines.append(f"# {text}\n")
            elif category == "Section-header" and text:
                lines.append(f"## {text}\n")
            elif category == "List-item" and text:
                lines.append(f"- {text}\n")
            elif category == "Table" and text:
                if text.strip().startswith("<"):
                    lines.append(text + "\n")
                else:
                    lines.append(f"**Table:** {text}\n")
            elif category == "Formula" and text:
                if text.strip().startswith("$") or "\\" in text:
                    lines.append(f"$$\n{text}\n$$\n")
                else:
                    lines.append(f"**Formula:** {text}\n")
            elif category == "Caption" and text:
                lines.append(f"*{text}*\n")
            elif category in ["Page-header", "Page-footer"]:
                continue
            elif category == "Picture":
                continue
            elif text:
                lines.append(f"{text}\n")
            lines.append("")
    except Exception:
        return json.dumps(layout_data, ensure_ascii=False)
    return "\n".join(lines)


def create_blocks_app():
    css = """
    .main-container { max-width: 1500px; margin: 0 auto; }
    .header-text { text-align: center; color: #1f2937; margin-bottom: 12px; }
    .page-info { text-align: center; padding: 8px 16px; border-radius: 20px; font-weight: 600; }
    .process-button { border: none !important; color: white !important; font-weight: 700 !important; }
    """

    with gr.Blocks(theme=gr.themes.Soft(), css=css, title=APP_TITLE) as demo:
        doc_state = gr.State(
            {
                "images": [],
                "current_page": 0,
                "total_pages": 0,
                "file_type": None,
                "checksum": None,
                "results": [],
                "parsed": False,
            }
        )

        cache_state = gr.State({})

        gr.HTML(
            """
            <div class=\"header-text\"> 
                <h2>VLM Playground — dots.ocr (Local)</h2>
                <p>Optimized defaults for Apple Silicon / CPU dev.</p>
            </div>
            """
        )

        with gr.Row(elem_classes=["main-container"]):
            with gr.Column(scale=4):
                file_input = gr.File(
                    label="Upload PDF or Image",
                    file_types=[
                        ".pdf",
                        ".png",
                        ".jpg",
                        ".jpeg",
                        ".bmp",
                        ".tiff",
                        ".webp",
                    ],
                    type="filepath",
                )

                with gr.Group():
                    template = gr.Dropdown(
                        label="Prompt Template",
                        choices=["Layout Extraction"],
                        value="Layout Extraction",
                    )
                    prompt_text = gr.Textbox(
                        label="Current Prompt",
                        value=dict_promptmode_to_prompt.get("prompt_layout_all_en", ""),
                        lines=6,
                    )

                with gr.Row():
                    parse_button = gr.Button(
                        "Parse", variant="primary", elem_classes=["process-button"]
                    )
                    clear_button = gr.Button("Clear")

                with gr.Accordion("Advanced", open=False):
                    max_new_tokens = gr.Slider(
                        minimum=256,
                        maximum=8192,
                        value=LOCAL_DEFAULT_MAX_NEW_TOKENS,
                        step=128,
                        label="Max new tokens",
                    )
                    page_range = gr.Textbox(
                        label="Page selection",
                        placeholder="e.g., 1-3,5 (blank = current page, 'all' = all pages)",
                    )

            with gr.Column(scale=5):
                preview_image = gr.Image(label="Page Preview", type="pil", height=520)
                with gr.Row():
                    prev_btn = gr.Button("◀ Prev")
                    page_info = gr.HTML('<div class="page-info">No file</div>')
                    next_btn = gr.Button("Next ▶")
                with gr.Row():
                    page_jump = gr.Number(value=1, label="Page #", precision=0)
                    jump_btn = gr.Button("Go")

            with gr.Column(scale=6):
                with gr.Tabs():
                    with gr.Tab("Markdown Render"):
                        md_render = gr.Markdown(
                            value="Upload and parse to view results", height=520
                        )
                    with gr.Tab("Raw Markdown"):
                        md_raw = gr.Textbox(value="", lines=20)
                    with gr.Tab("Current Page JSON"):
                        json_view = gr.JSON(value=None)
                    with gr.Tab("Processed Image"):
                        processed_view = gr.Image(type="pil", height=520)

                with gr.Row():
                    download_jsonl = gr.DownloadButton(label="Download JSONL")
                    download_markdown = gr.DownloadButton(label="Download Markdown")

        def on_template_change(choice: str) -> str:
            return dict_promptmode_to_prompt.get("prompt_layout_all_en", "")

        def on_file_change(path: Optional[str]):
            if not path or not os.path.exists(path):
                return (
                    {
                        "images": [],
                        "current_page": 0,
                        "total_pages": 0,
                        "file_type": None,
                        "checksum": None,
                        "results": [],
                        "parsed": False,
                    },
                    None,
                    '<div class="page-info">No file</div>',
                )
            checksum = file_checksum(path)
            ext = os.path.splitext(path)[1].lower()
            if ext == ".pdf":
                images = load_images_from_pdf(path)
                state = {
                    "images": images,
                    "current_page": 0,
                    "total_pages": len(images),
                    "file_type": "pdf",
                    "checksum": checksum,
                    "results": [None] * len(images),
                    "parsed": False,
                }
                return (
                    state,
                    images[0] if images else None,
                    f'<div class="page-info">Page 1 / {len(images)}</div>',
                )
            else:
                image = Image.open(path).convert("RGB")
                state = {
                    "images": [image],
                    "current_page": 0,
                    "total_pages": 1,
                    "file_type": "image",
                    "checksum": checksum,
                    "results": [None],
                    "parsed": False,
                }
                return state, image, '<div class="page-info">Page 1 / 1</div>'

        def nav_page(state: Dict[str, Any], direction: str):
            if not state.get("images"):
                return (
                    state,
                    None,
                    '<div class="page-info">No file</div>',
                    "No results",
                    "",
                    None,
                    None,
                )
            if direction == "prev":
                state["current_page"] = max(0, state["current_page"] - 1)
            elif direction == "next":
                state["current_page"] = min(
                    state["total_pages"] - 1, state["current_page"] + 1
                )
            idx = state["current_page"]
            img = state["images"][idx]
            info = (
                f'<div class="page-info">Page {idx + 1} / {state["total_pages"]}</div>'
            )
            result = (
                state["results"][idx]
                if state.get("parsed") and idx < len(state["results"])
                else None
            )
            md = result.get("markdown") if result else "Page not processed yet"
            md_out = gr.update(value=md, rtl=True) if is_arabic_text(md) else md
            md_raw_text = md
            proc_img = result.get("processed_image") if result else None
            js = result.get("layout_result") if result else None
            return state, img, info, md_out, md_raw_text, proc_img, js

        def jump_to_page(state: Dict[str, Any], page_num: Any):
            if not state.get("images"):
                return (
                    state,
                    None,
                    '<div class="page-info">No file</div>',
                    "No results",
                    "",
                    None,
                    None,
                )
            try:
                n = int(page_num)
            except Exception:
                n = 1
            n = max(1, min(state["total_pages"], n))
            state["current_page"] = n - 1
            return nav_page(state, direction="stay")

        def parse_pages(
            state: Dict[str, Any],
            prompt: str,
            max_tokens: int,
            selection: Optional[str],
        ):
            if not state.get("images"):
                return state, None, "No file", "No content", "", None, None

            indices: List[int] = []
            if not selection or selection.strip() == "":
                indices = [state["current_page"]]
            elif selection.strip().lower() == "all":
                indices = list(range(state["total_pages"]))
            else:
                parts = [p.strip() for p in selection.split(",") if p.strip()]
                for p in parts:
                    if "-" in p:
                        a, b = p.split("-", 1)
                        try:
                            a_i = max(1, int(a))
                            b_i = min(state["total_pages"], int(b))
                            for i in range(a_i - 1, b_i):
                                indices.append(i)
                        except Exception:
                            continue
                    else:
                        try:
                            i = max(1, min(state["total_pages"], int(p)))
                            indices.append(i - 1)
                        except Exception:
                            continue
                indices = sorted(
                    set([i for i in indices if 0 <= i < state["total_pages"]])
                )

            results = state.get("results") or [None] * state["total_pages"]
            for i in indices:
                img = state["images"][i]
                prompt_hash = hashlib.sha256(prompt.encode("utf-8")).hexdigest()[:16]
                cache_key = (
                    state["checksum"],
                    i,
                    prompt_hash,
                    int(max_tokens),
                )
                cached = cache_state.value.get(cache_key)
                if cached:
                    results[i] = cached
                    continue
                res = process_single_image(
                    img,
                    prompt_text=prompt,
                    max_new_tokens=int(max_tokens),
                )
                results[i] = res
                cache_state.value[cache_key] = res
            state["results"] = results
            state["parsed"] = True

            idx = state["current_page"]
            curr = results[idx]
            md = curr.get("markdown") if curr else "No content"
            md_out = gr.update(value=md, rtl=True) if is_arabic_text(md) else md
            md_raw_text = md
            proc_img = curr.get("processed_image") if curr else None
            js = curr.get("layout_result") if curr else None
            info = (
                f'<div class="page-info">Page {idx + 1} / {state["total_pages"]}</div>'
            )
            prev = state["images"][idx]
            return state, prev, info, md_out, md_raw_text, proc_img, js

        def clear_all():
            gc.collect()
            return (
                {
                    "images": [],
                    "current_page": 0,
                    "total_pages": 0,
                    "file_type": None,
                    "checksum": None,
                    "results": [],
                    "parsed": False,
                },
                None,
                '<div class="page-info">No file</div>',
                "Upload and parse to view results",
                "",
                None,
                None,
            )

        def download_current_jsonl(state: Dict[str, Any]):
            if not state.get("parsed"):
                return gr.DownloadButton.update(value=b"")
            lines: List[str] = []
            for i, res in enumerate(state.get("results", [])):
                if res and res.get("layout_result") is not None:
                    obj = {"page": i + 1, "layout": res["layout_result"]}
                    lines.append(json.dumps(obj, ensure_ascii=False))
            content = "\n".join(lines) if lines else ""
            out_path = os.path.join(TMP_DIR, "results.jsonl")
            with open(out_path, "w", encoding="utf-8") as f:
                f.write(content)
            return gr.DownloadButton.update(value=out_path)

        def download_current_markdown(state: Dict[str, Any]):
            if not state.get("parsed"):
                return gr.DownloadButton.update(value=b"")
            chunks: List[str] = []
            for i, res in enumerate(state.get("results", [])):
                if res and res.get("markdown"):
                    chunks.append(f"## Page {i + 1}\n\n{res['markdown']}")
            content = "\n\n---\n\n".join(chunks) if chunks else ""
            out_path = os.path.join(TMP_DIR, "results.md")
            with open(out_path, "w", encoding="utf-8") as f:
                f.write(content)
            return gr.DownloadButton.update(value=out_path)

        template.change(on_template_change, inputs=[template], outputs=[prompt_text])
        file_input.change(
            on_file_change,
            inputs=[file_input],
            outputs=[doc_state, preview_image, page_info],
        )
        prev_btn.click(
            lambda s: nav_page(s, "prev"),
            inputs=[doc_state],
            outputs=[
                doc_state,
                preview_image,
                page_info,
                md_render,
                md_raw,
                processed_view,
                json_view,
            ],
        )
        next_btn.click(
            lambda s: nav_page(s, "next"),
            inputs=[doc_state],
            outputs=[
                doc_state,
                preview_image,
                page_info,
                md_render,
                md_raw,
                processed_view,
                json_view,
            ],
        )
        jump_btn.click(
            jump_to_page,
            inputs=[doc_state, page_jump],
            outputs=[
                doc_state,
                preview_image,
                page_info,
                md_render,
                md_raw,
                processed_view,
                json_view,
            ],
        )
        parse_button.click(
            parse_pages,
            inputs=[doc_state, prompt_text, max_new_tokens, page_range],
            outputs=[
                doc_state,
                preview_image,
                page_info,
                md_render,
                md_raw,
                processed_view,
                json_view,
            ],
        )
        clear_button.click(
            clear_all,
            outputs=[
                doc_state,
                preview_image,
                page_info,
                md_render,
                md_raw,
                processed_view,
                json_view,
            ],
        )
        download_jsonl.click(
            download_current_jsonl, inputs=[doc_state], outputs=[download_jsonl]
        )
        download_markdown.click(
            download_current_markdown, inputs=[doc_state], outputs=[download_markdown]
        )

        return demo