GharScan / inference.py
Ritvik Shrivastava
fix: is_structural override
5e70e6d
Raw
History Blame Contribute Delete
7 kB
"""
inference.py — GharScan Qwen2-VL-2B inference pipeline
"""
import re, json, time, torch
from PIL import Image
from loguru import logger
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
from qwen_vl_utils import process_vision_info
from cost_matrix import build_cost_response
BASE_MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct"
LORA_MODEL_ID = "ritvik360/gharscan-qwen2vl-lora"
MAX_NEW_TOKENS = 256
TEMPERATURE = 0.05
_model = None
_processor = None
def _load_model_if_needed():
global _model, _processor
if _model is not None:
return
logger.info(f"Loading {LORA_MODEL_ID} …")
t0 = time.monotonic()
try:
from peft import PeftModel
base = Qwen2VLForConditionalGeneration.from_pretrained(
BASE_MODEL_ID, torch_dtype=torch.bfloat16)
_model = PeftModel.from_pretrained(base, LORA_MODEL_ID)
_model = _model.merge_and_unload()
logger.info("LoRA loaded ✅")
except Exception as e:
logger.warning(f"LoRA failed ({e}) — using base model")
_model = Qwen2VLForConditionalGeneration.from_pretrained(
BASE_MODEL_ID, torch_dtype=torch.bfloat16)
_processor = AutoProcessor.from_pretrained(BASE_MODEL_ID)
_model.eval()
logger.info(f"Model ready in {time.monotonic()-t0:.1f}s")
def _call_vlm(image: Image.Image, prompt: str) -> dict:
messages = [{"role": "user", "content": [
{"type": "image", "image": image},
{"type": "text", "text": prompt}
]}]
text = _processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
image_inputs, _ = process_vision_info(messages)
inputs = _processor(text=[text], images=image_inputs, return_tensors="pt").to(_model.device)
with torch.no_grad():
out = _model.generate(**inputs, max_new_tokens=MAX_NEW_TOKENS,
temperature=TEMPERATURE, do_sample=TEMPERATURE > 0)
gen = out[0][inputs["input_ids"].shape[1]:]
raw = _processor.decode(gen, skip_special_tokens=True).strip()
try:
return json.loads(raw)
except Exception:
m = re.search(r'\{.*\}', raw, re.DOTALL)
try:
return json.loads(m.group()) if m else {}
except Exception:
return {}
_CLASSIFY_PROMPT = """You are GharScan, an expert Indian building inspector.
Analyze this image and return ONLY valid JSON:
{"defect_type":"<hairline_crack|settlement_crack|structural_crack|water_seepage|efflorescence|spalling|rebar_rust|plaster_delamination|no_defect>","description":"<25-word description>","primary_cause":"<1 sentence>","monsoon_risk":<true|false>,"confidence":<0.0-1.0>}"""
_SEVERITY_PROMPT = """You are GharScan. The defect is: {defect_type}.
Return ONLY valid JSON:
{{"severity":<1|2|3|4|5>,"is_structural":<bool>,"structural_reasoning":"<1 sentence>","immediate_action":"<specific action>","urgency_timeline":"<next_renovation|within_6_months|within_1_month|this_week|immediately>"}}"""
_DEFECT_DISPLAY = {
"hairline_crack":"Hairline Plaster Crack","settlement_crack":"Settlement Crack",
"structural_crack":"Structural Crack","water_seepage":"Water Seepage / Damp Patch",
"efflorescence":"Efflorescence (Salt Deposits)","spalling":"Concrete Spalling",
"rebar_rust":"Rebar Rust Staining","plaster_delamination":"Plaster Delamination",
"no_defect":"No Defect Detected",
}
def run_gharscan_pipeline(image: Image.Image, language: str = "en", trace_session=None) -> dict:
_load_model_if_needed()
_model.to("cuda")
try:
image = image.convert("RGB").resize((448, 448))
# Step 1: Classify
cls = _call_vlm(image, _CLASSIFY_PROMPT)
defect_type = cls.get("defect_type", "no_defect")
if trace_session:
trace_session.log_step("classify", {}, cls)
# Step 2: Severity
sev = _call_vlm(image, _SEVERITY_PROMPT.format(defect_type=defect_type))
raw_sev = sev.get("severity", 2)
try:
severity = max(1, min(5, int(float(raw_sev))))
except Exception:
severity = 2
if trace_session:
trace_session.log_step("severity", {"defect_type": defect_type}, sev)
# Step 3: Cost (deterministic)
cost = build_cost_response(defect_type, severity)
if trace_session:
trace_session.log_step("cost", {"defect_type": defect_type, "severity": severity}, cost)
STRUCTURAL_ALWAYS = {"structural_crack", "spalling", "rebar_rust"}
STRUCTURAL_BY_SEVERITY = {"settlement_crack": 4}
DEFAULT_ACTIONS = {
"structural_crack": "Stop using the affected area and call a licensed structural engineer.",
"spalling": "Keep clear of the area below and arrange a structural inspection.",
"rebar_rust": "Arrange urgent structural inspection before repairs.",
"settlement_crack": "Monitor movement and get a structural engineer’s opinion.",
}
is_structural = bool(sev.get("is_structural", False))
if defect_type in STRUCTURAL_ALWAYS:
is_structural = True
if severity >= STRUCTURAL_BY_SEVERITY.get(defect_type, 999):
is_structural = True
structural_reasoning = sev.get("structural_reasoning", "").strip()
if not structural_reasoning and is_structural:
structural_reasoning = "This defect type can indicate structural risk."
immediate_action = sev.get("immediate_action", "").strip() or DEFAULT_ACTIONS.get(
defect_type,
"Consult a licensed civil / structural engineer."
)
report = {
"analysis_ok": defect_type != "no_defect",
"defect_type": defect_type,
"defect_display": _DEFECT_DISPLAY.get(defect_type, defect_type.replace("_"," ").title()),
"description": cls.get("description", ""),
"primary_cause": cls.get("primary_cause", ""),
"monsoon_risk": cls.get("monsoon_risk", False),
"severity": severity,
"severity_label": cost["severity_label"],
"severity_color": cost["severity_color"],
"is_structural": is_structural,
"structural_reasoning": structural_reasoning,
"immediate_action": immediate_action,
"urgency_display": cost["urgency_display"],
"cost_range_inr": cost["cost_range_inr"],
"professional_display": cost["professional_display"],
"requires_engineer": cost["requires_engineer"],
"disclaimer": cost["disclaimer"],
"show_liability_banner":cost["show_liability_banner"],
"liability_text": cost["liability_text"],
}
if trace_session:
trace_session.finalize(report)
return report
finally:
_model.to("cpu")
torch.cuda.empty_cache()