"""Central configuration for DocuMaker.

Every tunable is read from environment variables (optionally a local ``.env``
file), so model ids / devices can be swapped without touching code.
"""
from __future__ import annotations

import functools
import os
from pathlib import Path

from dotenv import load_dotenv

# Project root = parent of the ``src`` package directory.
PROJECT_ROOT = Path(__file__).resolve().parent.parent

# Load .env from the project root if present (silently ignored if missing).
load_dotenv(PROJECT_ROOT / ".env")


def _flag(name: str, default: str = "0") -> bool:
    return os.getenv(name, default).strip().lower() not in ("0", "false", "no", "")


# --- Paths -------------------------------------------------------------------
WORK_DIR = Path(os.getenv("DOCUMAKER_WORK_DIR", str(PROJECT_ROOT / "work"))).resolve()
WORK_DIR.mkdir(parents=True, exist_ok=True)

# --- HuggingFace credentials -------------------------------------------------
def _token_candidates() -> list[str]:
    seen: set[str] = set()
    out: list[str] = []
    for value in (
        os.getenv("DOCUMAKER_HF_TOKEN"),
        os.getenv("HF_TOKEN"),
        os.getenv("HUGGINGFACEHUB_API_TOKEN"),
    ):
        value = (value or "").strip()
        if value and value not in seen:
            seen.add(value)
            out.append(value)
    return out


# Whether a UI token may be mirrored into the process environment. True for the
# default local single-user app; turned off automatically for shared/multi-user
# launches so one user's token can't leak to another via the global environment.
_ALLOW_ENV_TOKEN = True


def set_allow_env_token(allowed: bool) -> None:
    global _ALLOW_ENV_TOKEN
    _ALLOW_ENV_TOKEN = bool(allowed)


def apply_token(token: str | None) -> str | None:
    """Return the cleaned UI token and, **in single-user mode only**, mirror it
    into the process ``HF_TOKEN``/``HUGGINGFACEHUB_API_TOKEN`` so huggingface_hub
    (InferenceClient auto-discovery, model downloads) also uses it.

    In multi-user/shared mode the environment is left untouched — the token is
    still threaded explicitly to the LLM and captioner, so it stays scoped to the
    caller's session and nothing leaks across users. Returns None if empty.
    """
    token = (token or "").strip()
    if token and _ALLOW_ENV_TOKEN:
        os.environ["HF_TOKEN"] = token
        os.environ["HUGGINGFACEHUB_API_TOKEN"] = token
    return token or None


@functools.lru_cache(maxsize=1)
def resolve_hf_token() -> str | None:
    """Return the first *valid* HF token among the configured candidates.

    Environments often have a stale ``HF_TOKEN`` alongside a working
    ``HUGGINGFACEHUB_API_TOKEN`` (or vice versa). We validate via ``whoami`` and
    pick the one that authenticates, then point huggingface_hub's own
    auto-discovery (model downloads, etc.) at the same working token.
    """
    candidates = _token_candidates()
    if not candidates:
        return None

    chosen = candidates[0]
    try:
        from huggingface_hub import whoami

        for token in candidates:
            try:
                whoami(token=token)
                chosen = token
                break
            except Exception:
                continue
    except Exception:
        pass  # offline / hub import issue — fall back to the first candidate

    os.environ["HF_TOKEN"] = chosen
    os.environ["HUGGINGFACEHUB_API_TOKEN"] = chosen
    return chosen

# --- Text LLM (HF Inference API) --------------------------------------------
LLM_MODEL = os.getenv("DOCUMAKER_LLM_MODEL", "Qwen/Qwen2.5-7B-Instruct")
LLM_PROVIDER = os.getenv("DOCUMAKER_LLM_PROVIDER", "").strip() or None
LLM_MAX_TOKENS = int(os.getenv("DOCUMAKER_LLM_MAX_TOKENS", "4096"))
LLM_TEMPERATURE = float(os.getenv("DOCUMAKER_LLM_TEMPERATURE", "0.3"))
# Approx. characters of transcript per LLM chunk (keeps prompts within context).
LLM_CHUNK_CHARS = int(os.getenv("DOCUMAKER_LLM_CHUNK_CHARS", "6000"))

# --- Vision LLM (HF Inference API) + local fallback -------------------------
ENABLE_VISION = _flag("DOCUMAKER_ENABLE_VISION", "1")
VLM_MODEL = os.getenv("DOCUMAKER_VLM_MODEL", "Qwen/Qwen2-VL-7B-Instruct")
VLM_PROVIDER = os.getenv("DOCUMAKER_VLM_PROVIDER", "").strip() or None
LOCAL_CAPTION_MODEL = os.getenv(
    "DOCUMAKER_LOCAL_CAPTION_MODEL", "Salesforce/blip-image-captioning-base"
)

# --- Whisper (local faster-whisper) -----------------------------------------
WHISPER_MODEL = os.getenv("DOCUMAKER_WHISPER_MODEL", "small")
WHISPER_DEVICE = os.getenv("DOCUMAKER_WHISPER_DEVICE", "auto").strip().lower()
# Blank => choose automatically per device (int8_float16 on CUDA, int8 on CPU).
WHISPER_COMPUTE_TYPE = os.getenv("DOCUMAKER_WHISPER_COMPUTE_TYPE", "").strip()

# --- Frame extraction --------------------------------------------------------
SCENE_THRESHOLD = float(os.getenv("DOCUMAKER_SCENE_THRESHOLD", "27.0"))
SCENE_MIN_LEN_SEC = float(os.getenv("DOCUMAKER_SCENE_MIN_LEN_SEC", "1.0"))
DEDUP_HASH_DISTANCE = int(os.getenv("DOCUMAKER_DEDUP_HASH_DISTANCE", "6"))

# --- DOCX --------------------------------------------------------------------
DOCX_IMAGE_WIDTH_INCHES = float(os.getenv("DOCUMAKER_DOCX_IMAGE_WIDTH_INCHES", "5.5"))

# --- External binaries -------------------------------------------------------
FFMPEG_BIN = os.getenv("DOCUMAKER_FFMPEG_BIN", "ffmpeg")
FFPROBE_BIN = os.getenv("DOCUMAKER_FFPROBE_BIN", "ffprobe")


def session_dir(session_id: str) -> Path:
    """Return (creating if needed) the working directory for one session."""
    d = WORK_DIR / session_id
    (d / "frames").mkdir(parents=True, exist_ok=True)
    return d