drywall-qa-clipseg / src /predict.py
youngPhilosopher's picture
Upload folder using huggingface_hub
b891e61 verified
"""Standalone single-image inference for CLIPSeg."""
import argparse
from pathlib import Path
import numpy as np
import torch
import yaml
from PIL import Image
from src.model.clipseg_wrapper import load_model_and_processor
from src.train import get_device
PROJECT_ROOT = Path(__file__).resolve().parents[1]
def predict(image_path: str, prompt: str, config_path: str | None = None, output_path: str | None = None):
config_path = config_path or str(PROJECT_ROOT / "configs" / "train_config.yaml")
with open(config_path) as f:
config = yaml.safe_load(f)
device = get_device()
model, processor = load_model_and_processor(config["model"]["name"], config["model"]["freeze_backbone"])
ckpt = PROJECT_ROOT / "outputs" / "checkpoints" / "best_model.pt"
model.load_state_dict(torch.load(ckpt, map_location="cpu", weights_only=True))
model = model.to(device).eval()
image = Image.open(image_path).convert("RGB")
orig_w, orig_h = image.size
inputs = processor(text=[prompt], images=[image], return_tensors="pt", padding=True)
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
logits = model(**inputs).logits
pred = (torch.sigmoid(logits[0]) > config["evaluation"]["threshold"]).cpu().numpy().astype(np.uint8)
mask = Image.fromarray(pred * 255, mode="L").resize((orig_w, orig_h), Image.NEAREST)
if output_path is None:
stem = Path(image_path).stem
slug = prompt.replace(" ", "_")
output_path = str(PROJECT_ROOT / "outputs" / "masks" / f"{stem}__{slug}.png")
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
mask.save(output_path)
print(f"Saved mask to {output_path}")
return mask
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("image", help="Path to input image")
parser.add_argument("prompt", help="Text prompt, e.g. 'segment crack'")
parser.add_argument("--output", help="Output mask path")
args = parser.parse_args()
predict(args.image, args.prompt, output_path=args.output)