"""
inference.py — GharScan Qwen2-VL-2B inference pipeline
"""
import re, json, time, torch
from PIL import Image
from loguru import logger
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
from qwen_vl_utils import process_vision_info
from cost_matrix import build_cost_response

BASE_MODEL_ID  = "Qwen/Qwen2-VL-2B-Instruct"
LORA_MODEL_ID  = "ritvik360/gharscan-qwen2vl-lora"
MAX_NEW_TOKENS = 256
TEMPERATURE    = 0.05

_model     = None
_processor = None

def _load_model_if_needed():
    global _model, _processor
    if _model is not None:
        return
    logger.info(f"Loading {LORA_MODEL_ID} …")
    t0 = time.monotonic()
    try:
        from peft import PeftModel
        base = Qwen2VLForConditionalGeneration.from_pretrained(
            BASE_MODEL_ID, torch_dtype=torch.bfloat16)
        _model = PeftModel.from_pretrained(base, LORA_MODEL_ID)
        _model = _model.merge_and_unload()
        logger.info("LoRA loaded ✅")
    except Exception as e:
        logger.warning(f"LoRA failed ({e}) — using base model")
        _model = Qwen2VLForConditionalGeneration.from_pretrained(
            BASE_MODEL_ID, torch_dtype=torch.bfloat16)
    _processor = AutoProcessor.from_pretrained(BASE_MODEL_ID)
    _model.eval()
    logger.info(f"Model ready in {time.monotonic()-t0:.1f}s")

def _call_vlm(image: Image.Image, prompt: str) -> dict:
    messages = [{"role": "user", "content": [
        {"type": "image", "image": image},
        {"type": "text",  "text":  prompt}
    ]}]
    text = _processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    image_inputs, _ = process_vision_info(messages)
    inputs = _processor(text=[text], images=image_inputs, return_tensors="pt").to(_model.device)
    with torch.no_grad():
        out = _model.generate(**inputs, max_new_tokens=MAX_NEW_TOKENS,
                              temperature=TEMPERATURE, do_sample=TEMPERATURE > 0)
    gen = out[0][inputs["input_ids"].shape[1]:]
    raw = _processor.decode(gen, skip_special_tokens=True).strip()
    try:
        return json.loads(raw)
    except Exception:
        m = re.search(r'\{.*\}', raw, re.DOTALL)
        try:
            return json.loads(m.group()) if m else {}
        except Exception:
            return {}

_CLASSIFY_PROMPT = """You are GharScan, an expert Indian building inspector.
Analyze this image and return ONLY valid JSON:
{"defect_type":"<hairline_crack|settlement_crack|structural_crack|water_seepage|efflorescence|spalling|rebar_rust|plaster_delamination|no_defect>","description":"<25-word description>","primary_cause":"<1 sentence>","monsoon_risk":<true|false>,"confidence":<0.0-1.0>}"""

_SEVERITY_PROMPT = """You are GharScan. The defect is: {defect_type}.
Return ONLY valid JSON:
{{"severity":<1|2|3|4|5>,"is_structural":<bool>,"structural_reasoning":"<1 sentence>","immediate_action":"<specific action>","urgency_timeline":"<next_renovation|within_6_months|within_1_month|this_week|immediately>"}}"""

_DEFECT_DISPLAY = {
    "hairline_crack":"Hairline Plaster Crack","settlement_crack":"Settlement Crack",
    "structural_crack":"Structural Crack","water_seepage":"Water Seepage / Damp Patch",
    "efflorescence":"Efflorescence (Salt Deposits)","spalling":"Concrete Spalling",
    "rebar_rust":"Rebar Rust Staining","plaster_delamination":"Plaster Delamination",
    "no_defect":"No Defect Detected",
}

def run_gharscan_pipeline(image: Image.Image, language: str = "en", trace_session=None) -> dict:
    _load_model_if_needed()
    _model.to("cuda")
    try:
        image = image.convert("RGB").resize((448, 448))

        # Step 1: Classify
        cls  = _call_vlm(image, _CLASSIFY_PROMPT)
        defect_type = cls.get("defect_type", "no_defect")

        if trace_session:
            trace_session.log_step("classify", {}, cls)

        # Step 2: Severity
        sev  = _call_vlm(image, _SEVERITY_PROMPT.format(defect_type=defect_type))
        raw_sev = sev.get("severity", 2)
        try:
            severity = max(1, min(5, int(float(raw_sev))))
        except Exception:
            severity = 2

        if trace_session:
            trace_session.log_step("severity", {"defect_type": defect_type}, sev)

        # Step 3: Cost (deterministic)
        cost = build_cost_response(defect_type, severity)

        if trace_session:
            trace_session.log_step("cost", {"defect_type": defect_type, "severity": severity}, cost)

        STRUCTURAL_ALWAYS = {"structural_crack", "spalling", "rebar_rust"}
        STRUCTURAL_BY_SEVERITY = {"settlement_crack": 4}

        DEFAULT_ACTIONS = {
            "structural_crack": "Stop using the affected area and call a licensed structural engineer.",
            "spalling": "Keep clear of the area below and arrange a structural inspection.",
            "rebar_rust": "Arrange urgent structural inspection before repairs.",
            "settlement_crack": "Monitor movement and get a structural engineer’s opinion.",
        }

        is_structural = bool(sev.get("is_structural", False))
        if defect_type in STRUCTURAL_ALWAYS:
            is_structural = True
        if severity >= STRUCTURAL_BY_SEVERITY.get(defect_type, 999):
            is_structural = True

        structural_reasoning = sev.get("structural_reasoning", "").strip()
        if not structural_reasoning and is_structural:
            structural_reasoning = "This defect type can indicate structural risk."

        immediate_action = sev.get("immediate_action", "").strip() or DEFAULT_ACTIONS.get(
            defect_type,
            "Consult a licensed civil / structural engineer."
        )
        report = {
            "analysis_ok":          defect_type != "no_defect",
            "defect_type":          defect_type,
            "defect_display":       _DEFECT_DISPLAY.get(defect_type, defect_type.replace("_"," ").title()),
            "description":          cls.get("description", ""),
            "primary_cause":        cls.get("primary_cause", ""),
            "monsoon_risk":         cls.get("monsoon_risk", False),
            "severity":             severity,
            "severity_label":       cost["severity_label"],
            "severity_color":       cost["severity_color"],
            "is_structural": is_structural,
            "structural_reasoning": structural_reasoning,
            "immediate_action": immediate_action,
            "urgency_display":      cost["urgency_display"],
            "cost_range_inr":       cost["cost_range_inr"],
            "professional_display": cost["professional_display"],
            "requires_engineer":    cost["requires_engineer"],
            "disclaimer":           cost["disclaimer"],
            "show_liability_banner":cost["show_liability_banner"],
            "liability_text":       cost["liability_text"],
        }
        if trace_session:
            trace_session.finalize(report)
        return report

    finally:
        _model.to("cpu")
        torch.cuda.empty_cache()