""" inference.py — GharScan Qwen2-VL-2B inference pipeline """ import re, json, time, torch from PIL import Image from loguru import logger from transformers import Qwen2VLForConditionalGeneration, AutoProcessor from qwen_vl_utils import process_vision_info from cost_matrix import build_cost_response BASE_MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct" LORA_MODEL_ID = "ritvik360/gharscan-qwen2vl-lora" MAX_NEW_TOKENS = 256 TEMPERATURE = 0.05 _model = None _processor = None def _load_model_if_needed(): global _model, _processor if _model is not None: return logger.info(f"Loading {LORA_MODEL_ID} …") t0 = time.monotonic() try: from peft import PeftModel base = Qwen2VLForConditionalGeneration.from_pretrained( BASE_MODEL_ID, torch_dtype=torch.bfloat16) _model = PeftModel.from_pretrained(base, LORA_MODEL_ID) _model = _model.merge_and_unload() logger.info("LoRA loaded ✅") except Exception as e: logger.warning(f"LoRA failed ({e}) — using base model") _model = Qwen2VLForConditionalGeneration.from_pretrained( BASE_MODEL_ID, torch_dtype=torch.bfloat16) _processor = AutoProcessor.from_pretrained(BASE_MODEL_ID) _model.eval() logger.info(f"Model ready in {time.monotonic()-t0:.1f}s") def _call_vlm(image: Image.Image, prompt: str) -> dict: messages = [{"role": "user", "content": [ {"type": "image", "image": image}, {"type": "text", "text": prompt} ]}] text = _processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) image_inputs, _ = process_vision_info(messages) inputs = _processor(text=[text], images=image_inputs, return_tensors="pt").to(_model.device) with torch.no_grad(): out = _model.generate(**inputs, max_new_tokens=MAX_NEW_TOKENS, temperature=TEMPERATURE, do_sample=TEMPERATURE > 0) gen = out[0][inputs["input_ids"].shape[1]:] raw = _processor.decode(gen, skip_special_tokens=True).strip() try: return json.loads(raw) except Exception: m = re.search(r'\{.*\}', raw, re.DOTALL) try: return json.loads(m.group()) if m else {} except Exception: return {} _CLASSIFY_PROMPT = """You are GharScan, an expert Indian building inspector. Analyze this image and return ONLY valid JSON: {"defect_type":"","description":"<25-word description>","primary_cause":"<1 sentence>","monsoon_risk":,"confidence":<0.0-1.0>}""" _SEVERITY_PROMPT = """You are GharScan. The defect is: {defect_type}. Return ONLY valid JSON: {{"severity":<1|2|3|4|5>,"is_structural":,"structural_reasoning":"<1 sentence>","immediate_action":"","urgency_timeline":""}}""" _DEFECT_DISPLAY = { "hairline_crack":"Hairline Plaster Crack","settlement_crack":"Settlement Crack", "structural_crack":"Structural Crack","water_seepage":"Water Seepage / Damp Patch", "efflorescence":"Efflorescence (Salt Deposits)","spalling":"Concrete Spalling", "rebar_rust":"Rebar Rust Staining","plaster_delamination":"Plaster Delamination", "no_defect":"No Defect Detected", } def run_gharscan_pipeline(image: Image.Image, language: str = "en", trace_session=None) -> dict: _load_model_if_needed() _model.to("cuda") try: image = image.convert("RGB").resize((448, 448)) # Step 1: Classify cls = _call_vlm(image, _CLASSIFY_PROMPT) defect_type = cls.get("defect_type", "no_defect") if trace_session: trace_session.log_step("classify", {}, cls) # Step 2: Severity sev = _call_vlm(image, _SEVERITY_PROMPT.format(defect_type=defect_type)) raw_sev = sev.get("severity", 2) try: severity = max(1, min(5, int(float(raw_sev)))) except Exception: severity = 2 if trace_session: trace_session.log_step("severity", {"defect_type": defect_type}, sev) # Step 3: Cost (deterministic) cost = build_cost_response(defect_type, severity) if trace_session: trace_session.log_step("cost", {"defect_type": defect_type, "severity": severity}, cost) STRUCTURAL_ALWAYS = {"structural_crack", "spalling", "rebar_rust"} STRUCTURAL_BY_SEVERITY = {"settlement_crack": 4} DEFAULT_ACTIONS = { "structural_crack": "Stop using the affected area and call a licensed structural engineer.", "spalling": "Keep clear of the area below and arrange a structural inspection.", "rebar_rust": "Arrange urgent structural inspection before repairs.", "settlement_crack": "Monitor movement and get a structural engineer’s opinion.", } is_structural = bool(sev.get("is_structural", False)) if defect_type in STRUCTURAL_ALWAYS: is_structural = True if severity >= STRUCTURAL_BY_SEVERITY.get(defect_type, 999): is_structural = True structural_reasoning = sev.get("structural_reasoning", "").strip() if not structural_reasoning and is_structural: structural_reasoning = "This defect type can indicate structural risk." immediate_action = sev.get("immediate_action", "").strip() or DEFAULT_ACTIONS.get( defect_type, "Consult a licensed civil / structural engineer." ) report = { "analysis_ok": defect_type != "no_defect", "defect_type": defect_type, "defect_display": _DEFECT_DISPLAY.get(defect_type, defect_type.replace("_"," ").title()), "description": cls.get("description", ""), "primary_cause": cls.get("primary_cause", ""), "monsoon_risk": cls.get("monsoon_risk", False), "severity": severity, "severity_label": cost["severity_label"], "severity_color": cost["severity_color"], "is_structural": is_structural, "structural_reasoning": structural_reasoning, "immediate_action": immediate_action, "urgency_display": cost["urgency_display"], "cost_range_inr": cost["cost_range_inr"], "professional_display": cost["professional_display"], "requires_engineer": cost["requires_engineer"], "disclaimer": cost["disclaimer"], "show_liability_banner":cost["show_liability_banner"], "liability_text": cost["liability_text"], } if trace_session: trace_session.finalize(report) return report finally: _model.to("cpu") torch.cuda.empty_cache()