Add inference script with CLI and Python API

9839534 verified about 1 month ago

8.59 kB

	#!/usr/bin/env python3
	"""
	Material Surface Classifier — Inference Script
	================================================
	Classify images of surfaces into 5 material categories:
	asphalt · concrete · metal · wood · other

	Supports:
	- Single image, multiple images, or entire directories
	- CLI and programmatic (Python import) usage
	- Local model path or Hugging Face Hub model ID
	- JSON or human-readable output
	- Confidence thresholding
	- Batch processing

	Requirements:
	pip install transformers torch pillow timm

	Usage (CLI):
	# Single image
	python inference.py photo.jpg

	# Multiple images
	python inference.py img1.jpg img2.png img3.jpg

	# Directory of images
	python inference.py path/to/image_dir/

	# With custom model path (local or Hub)
	python inference.py photo.jpg --model models/material_surface
	python inference.py photo.jpg --model dacanizalesconvers/material-surface-classifier

	# JSON output
	python inference.py photo.jpg --json

	# With confidence threshold (flag low-confidence predictions)
	python inference.py photo.jpg --threshold 0.5

	Usage (Python):
	from inference import MaterialSurfaceClassifier

	clf = MaterialSurfaceClassifier("dacanizalesconvers/material-surface-classifier")
	result = clf.predict("photo.jpg")
	# => {"label": "concrete", "confidence": 0.94, "scores": {"concrete": 0.94, ...}}

	results = clf.predict_batch(["a.jpg", "b.jpg"])
	"""

	import argparse
	import json
	import sys
	from pathlib import Path
	from typing import Union, Optional

	import torch
	from PIL import Image
	from transformers import AutoImageProcessor, AutoModelForImageClassification, pipeline


	# ─── Constants ────────────────────────────────────────────────────────────────
	DEFAULT_MODEL = "dacanizalesconvers/material-surface-classifier"
	LABELS = ["asphalt", "concrete", "metal", "other", "wood"]
	IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff", ".webp"}


	# ─── Programmatic API ────────────────────────────────────────────────────────
	class MaterialSurfaceClassifier:
	"""
	Material surface classifier for programmatic use.

	Args:
	model_path: Local path or HF Hub model ID.
	device: "cuda", "cpu", or "mps". Auto-detected if None.

	Example:
	clf = MaterialSurfaceClassifier("dacanizalesconvers/material-surface-classifier")
	result = clf.predict("photo.jpg")
	print(result["label"], result["confidence"])
	"""

	def __init__(self, model_path: str = DEFAULT_MODEL, device: Optional[str] = None):
	if device is None:
	device = "cuda" if torch.cuda.is_available() else "cpu"
	self.device = device
	self.pipe = pipeline(
	"image-classification",
	model=model_path,
	device=device,
	)

	def predict(self, image: Union[str, Path, Image.Image], top_k: int = 5) -> dict:
	"""
	Classify a single image.

	Args:
	image: File path (str/Path) or PIL Image.
	top_k: Number of top predictions to return.

	Returns:
	dict with keys: label, confidence, scores
	"""
	if isinstance(image, (str, Path)):
	image = Image.open(image).convert("RGB")
	elif not isinstance(image, Image.Image):
	raise TypeError(f"Expected str, Path, or PIL.Image, got {type(image)}")

	raw = self.pipe(image, top_k=top_k)
	return {
	"label": raw[0]["label"],
	"confidence": round(raw[0]["score"], 4),
	"scores": {r["label"]: round(r["score"], 4) for r in raw},
	}

	def predict_batch(
	self,
	images: list,
	top_k: int = 5,
	threshold: float = 0.0,
	) -> list:
	"""
	Classify a list of images.

	Args:
	images: List of file paths (str/Path) or PIL Images.
	top_k: Number of top predictions per image.
	threshold: Minimum confidence to accept a prediction.

	Returns:
	List of dicts, each with: file (if path), label, confidence, scores,
	and optionally below_threshold or error.
	"""
	results = []
	for img in images:
	entry = {}
	if isinstance(img, (str, Path)):
	entry["file"] = str(img)
	try:
	result = self.predict(img, top_k=top_k)
	entry.update(result)
	if result["confidence"] < threshold:
	entry["below_threshold"] = True
	except Exception as e:
	entry["error"] = str(e)
	results.append(entry)
	return results


	# ─── CLI helpers ──────────────────────────────────────────────────────────────
	def collect_images(paths: list) -> list:
	"""Gather image file paths from files and directories."""
	image_paths = []
	for p in paths:
	p = Path(p)
	if p.is_dir():
	for ext in IMAGE_EXTENSIONS:
	image_paths.extend(sorted(p.glob(f"*{ext}")))
	image_paths.extend(sorted(p.glob(f"*{ext.upper()}")))
	elif p.is_file() and p.suffix.lower() in IMAGE_EXTENSIONS:
	image_paths.append(p)
	else:
	print(f"⚠️ Skipping: {p} (not a recognised image file or directory)",
	file=sys.stderr)
	return image_paths


	def print_results(results: list, as_json: bool = False):
	"""Pretty-print classification results."""
	if as_json:
	print(json.dumps(results, indent=2))
	return

	for r in results:
	if "error" in r:
	print(f"❌ {r.get('file', '?')}: {r['error']}")
	continue

	icon = "⚠️ " if r.get("below_threshold") else "✅"
	name = r.get("file", "<image>")
	print(f"{icon} {name}")
	print(f" Prediction: {r['label']} ({r['confidence']:.1%})")
	scores_str = " \| ".join(
	f"{lbl}: {sc:.1%}" for lbl, sc in r["scores"].items()
	)
	print(f" All scores: {scores_str}")
	print()


	# ─── CLI entry-point ─────────────────────────────────────────────────────────
	def main():
	parser = argparse.ArgumentParser(
	description=(
	"Material Surface Classifier — classify images into: "
	"asphalt, concrete, metal, wood, other"
	),
	)
	parser.add_argument(
	"inputs", nargs="+",
	help="Image file(s) or director(y/ies) to classify",
	)
	parser.add_argument(
	"--model", default=DEFAULT_MODEL,
	help=f"Model path or Hub ID (default: {DEFAULT_MODEL})",
	)
	parser.add_argument(
	"--device", default=None,
	help="Device: cuda / cpu / mps (default: auto-detect)",
	)
	parser.add_argument(
	"--threshold", type=float, default=0.0,
	help="Flag predictions below this confidence (default: 0.0)",
	)
	parser.add_argument(
	"--top-k", type=int, default=5,
	help="Number of top predictions to return (default: 5)",
	)
	parser.add_argument(
	"--json", action="store_true",
	help="Output results as JSON",
	)

	args = parser.parse_args()

	# Discover images
	image_paths = collect_images(args.inputs)
	if not image_paths:
	print("❌ No valid image files found.", file=sys.stderr)
	sys.exit(1)

	print(f"🔍 Found {len(image_paths)} image(s) to classify")
	print(f"📦 Loading model: {args.model}\n")

	# Load & run
	clf = MaterialSurfaceClassifier(args.model, device=args.device)
	results = clf.predict_batch(image_paths, top_k=args.top_k, threshold=args.threshold)

	# Output
	print_results(results, as_json=args.json)

	# Summary
	if not args.json:
	from collections import Counter
	preds = [r["label"] for r in results if "error" not in r]
	if preds:
	counts = Counter(preds)
	print("─" * 40)
	print("📊 Summary:")
	for label, count in counts.most_common():
	print(f" {label}: {count} image(s)")


	if __name__ == "__main__":
	main()