"""Vision LLM OCR for mnemonic schema analysis.

Sends the full schema image to an OpenAI-compatible vision API
and receives structured JSON with all detected elements.

Configure via environment variables:
  VISION_API_URL  — base URL (e.g. http://localhost:8000/v1)
  VISION_API_KEY  — API key (optional)
  VISION_MODEL    — model name (e.g. llava-v1.6-mistral-7b)
"""

from __future__ import annotations

import base64
import json
import logging
import re
import urllib.error
import urllib.request
from typing import Any

import cv2
import numpy as np

logger = logging.getLogger(__name__)

_SYSTEM_PROMPT = (
    "Ты — эксперт по анализу промышленных мнемосхем (SCADA/HMI). "
    "Тебе дано изображение мнемосхемы. Твоя задача — найти и описать ВСЕ элементы на схеме."
)

_USER_PROMPT = """\
Проанализируй это изображение промышленной мнемосхемы и верни JSON со ВСЕМИ элементами.

Верни ТОЛЬКО валидный JSON (без markdown, без ```):

{
  "title": "название схемы из шапки",
  "elements": [
    {
      "type": "один из: widget, circle_uid, text, static_equipment, table, button, group_frame, arrow_pipe",
      "uid": "число — номер параметра из красного кружка или красные цифры в углу ячейки. Пустая строка если нет",
      "text": "текст внутри элемента",
      "x": 0,
      "y": 0,
      "width": 0,
      "height": 0,
      "description": "краткое описание элемента"
    }
  ]
}

Правила определения типов:
1. "circle_uid" — красный кружок с чёрными цифрами внутри (номер параметра)
2. "widget" — прямоугольник с числовым значением (показания датчика), часто с красной рамкой. Красные мелкие цифры в правом верхнем углу = uid
3. "text" — текстовая подпись (название оборудования, единицы измерения, заголовки секций)
4. "static_equipment" — изображение оборудования (насос, задвижка, вентилятор, резервуар, компрессор, циклон)
5. "table" — таблица с несколькими параметрами в строках/столбцах
6. "button" — кнопка интерфейса (Главный экран, Легенда и т.д.)
7. "group_frame" — рамка группы объектов (пунктирная или сплошная рамка вокруг секции)
8. "arrow_pipe" — стрелка или труба (линия потока материала/газа)

Координаты x, y, width, height — в пикселях изображения.
Найди ВСЕ элементы, особенно мелкие красные кружки с номерами и ячейки значений."""


def _encode_image_base64(image_bgr: np.ndarray) -> str:
    """Encode BGR numpy image to base64 PNG string."""
    success, buffer = cv2.imencode(".png", image_bgr)
    if not success:
        raise ValueError("Failed to encode image to PNG")
    return base64.b64encode(buffer.tobytes()).decode("ascii")


def _extract_json_from_response(text: str) -> dict[str, Any]:
    """Extract JSON object from LLM response text.

    Handles: markdown fences, truncated output (missing closing braces),
    trailing commas before closing brackets.
    """
    cleaned = text.strip()

    fence_match = re.search(r"```(?:json)?\s*\n?(.*?)```", cleaned, re.DOTALL)
    if fence_match:
        cleaned = fence_match.group(1).strip()

    brace_start = cleaned.find("{")
    if brace_start < 0:
        raise json.JSONDecodeError("No JSON object found", cleaned, 0)

    brace_end = cleaned.rfind("}")
    if brace_end > brace_start:
        cleaned = cleaned[brace_start:brace_end + 1]
    else:
        # Truncated — try to repair by closing open structures
        cleaned = cleaned[brace_start:]

    # Fix trailing commas: ,] or ,}
    cleaned = re.sub(r",\s*([}\]])", r"\1", cleaned)

    try:
        return json.loads(cleaned)
    except json.JSONDecodeError:
        pass

    # Truncated JSON — find last complete element in "elements" array
    # Try progressively shorter substrings
    last_complete = cleaned.rfind("}")
    while last_complete > 0:
        attempt = cleaned[:last_complete + 1]
        # Count open/close braces and brackets
        open_braces = attempt.count("{") - attempt.count("}")
        open_brackets = attempt.count("[") - attempt.count("]")
        # Close everything
        attempt += "]" * open_brackets + "}" * open_braces
        attempt = re.sub(r",\s*([}\]])", r"\1", attempt)
        try:
            return json.loads(attempt)
        except json.JSONDecodeError:
            pass
        last_complete = cleaned.rfind("}", 0, last_complete)

    raise json.JSONDecodeError("Could not parse truncated JSON", cleaned[:200], 0)


def analyze_schema_with_vision(
    image_bgr: np.ndarray,
    *,
    api_url: str,
    api_key: str = "",
    model: str = "",
    max_tokens: int = 16384,
    timeout_seconds: float = 300.0,
) -> dict[str, Any]:
    """Send schema image to vision LLM and get structured element list.

    Args:
        image_bgr: BGR numpy image of the schema.
        api_url: OpenAI-compatible API base URL (e.g. http://localhost:8000/v1).
        api_key: API key (empty string if not required).
        model: Model name.
        max_tokens: Maximum response tokens.
        timeout_seconds: Request timeout.

    Returns:
        Parsed dict with 'title' and 'elements' list.
    """
    b64_image = _encode_image_base64(image_bgr)
    img_h, img_w = image_bgr.shape[:2]

    endpoint = api_url.rstrip("/") + "/chat/completions"

    payload = {
        "model": model,
        "messages": [
            {
                "role": "system",
                "content": _SYSTEM_PROMPT,
            },
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": _USER_PROMPT},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/png;base64,{b64_image}",
                        },
                    },
                ],
            },
        ],
        "max_tokens": max_tokens,
        "temperature": 0,
    }

    headers = {
        "Content-Type": "application/json",
        "Accept": "application/json",
    }
    if api_key:
        headers["Authorization"] = f"Bearer {api_key}"

    body = json.dumps(payload).encode("utf-8")
    request = urllib.request.Request(
        endpoint, data=body, headers=headers, method="POST",
    )

    logger.info(
        "Vision OCR: sending %dx%d image to %s (model=%s)",
        img_w, img_h, endpoint, model,
    )

    try:
        with urllib.request.urlopen(request, timeout=timeout_seconds) as response:
            raw = response.read()
    except urllib.error.HTTPError as exc:
        detail = exc.read().decode("utf-8", errors="ignore")[:500]
        raise RuntimeError(f"Vision API returned {exc.code}: {detail}") from exc
    except urllib.error.URLError as exc:
        raise RuntimeError(f"Vision API unavailable: {exc.reason}") from exc
    except Exception as exc:
        raise RuntimeError(f"Vision API call failed: {exc}") from exc

    try:
        api_result = json.loads(raw.decode("utf-8"))
    except Exception as exc:
        raise RuntimeError("Vision API returned invalid JSON response") from exc

    choices = api_result.get("choices") or []
    if not choices:
        raise RuntimeError("Vision API returned no choices")

    message_content = str(
        choices[0].get("message", {}).get("content", "")
    ).strip()

    if not message_content:
        raise RuntimeError("Vision API returned empty content")

    try:
        result = _extract_json_from_response(message_content)
    except json.JSONDecodeError as exc:
        logger.warning("Failed to parse vision response as JSON: %s", exc)
        logger.debug("Raw response: %s", message_content[:1000])
        raise RuntimeError(
            f"Vision API returned non-JSON response: {message_content[:200]}"
        ) from exc

    elements = result.get("elements") or []
    logger.info(
        "Vision OCR: received %d elements, title='%s'",
        len(elements), str(result.get("title", ""))[:50],
    )

    return {
        "title": str(result.get("title") or "").strip(),
        "elements": [
            _normalize_element(elem, img_w, img_h)
            for elem in elements
            if isinstance(elem, dict)
        ],
        "imageWidth": img_w,
        "imageHeight": img_h,
    }


def _normalize_element(
    elem: dict[str, Any],
    img_w: int,
    img_h: int,
) -> dict[str, Any]:
    """Normalize and validate element coordinates."""
    x = max(0, min(img_w, int(float(elem.get("x") or 0))))
    y = max(0, min(img_h, int(float(elem.get("y") or 0))))
    w = max(1, min(img_w - x, int(float(elem.get("width") or 24))))
    h = max(1, min(img_h - y, int(float(elem.get("height") or 24))))

    return {
        "type": str(elem.get("type") or "text").strip(),
        "uid": str(elem.get("uid") or "").strip(),
        "text": str(elem.get("text") or "").strip(),
        "x": x,
        "y": y,
        "width": w,
        "height": h,
        "description": str(elem.get("description") or "").strip(),
    }