dacanizalesconvers's picture
Add inference script with CLI and Python API
9839534 verified
#!/usr/bin/env python3
"""
Material Surface Classifier β€” Inference Script
================================================
Classify images of surfaces into 5 material categories:
asphalt Β· concrete Β· metal Β· wood Β· other
Supports:
- Single image, multiple images, or entire directories
- CLI and programmatic (Python import) usage
- Local model path or Hugging Face Hub model ID
- JSON or human-readable output
- Confidence thresholding
- Batch processing
Requirements:
pip install transformers torch pillow timm
Usage (CLI):
# Single image
python inference.py photo.jpg
# Multiple images
python inference.py img1.jpg img2.png img3.jpg
# Directory of images
python inference.py path/to/image_dir/
# With custom model path (local or Hub)
python inference.py photo.jpg --model models/material_surface
python inference.py photo.jpg --model dacanizalesconvers/material-surface-classifier
# JSON output
python inference.py photo.jpg --json
# With confidence threshold (flag low-confidence predictions)
python inference.py photo.jpg --threshold 0.5
Usage (Python):
from inference import MaterialSurfaceClassifier
clf = MaterialSurfaceClassifier("dacanizalesconvers/material-surface-classifier")
result = clf.predict("photo.jpg")
# => {"label": "concrete", "confidence": 0.94, "scores": {"concrete": 0.94, ...}}
results = clf.predict_batch(["a.jpg", "b.jpg"])
"""
import argparse
import json
import sys
from pathlib import Path
from typing import Union, Optional
import torch
from PIL import Image
from transformers import AutoImageProcessor, AutoModelForImageClassification, pipeline
# ─── Constants ────────────────────────────────────────────────────────────────
DEFAULT_MODEL = "dacanizalesconvers/material-surface-classifier"
LABELS = ["asphalt", "concrete", "metal", "other", "wood"]
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff", ".webp"}
# ─── Programmatic API ────────────────────────────────────────────────────────
class MaterialSurfaceClassifier:
"""
Material surface classifier for programmatic use.
Args:
model_path: Local path or HF Hub model ID.
device: "cuda", "cpu", or "mps". Auto-detected if None.
Example:
clf = MaterialSurfaceClassifier("dacanizalesconvers/material-surface-classifier")
result = clf.predict("photo.jpg")
print(result["label"], result["confidence"])
"""
def __init__(self, model_path: str = DEFAULT_MODEL, device: Optional[str] = None):
if device is None:
device = "cuda" if torch.cuda.is_available() else "cpu"
self.device = device
self.pipe = pipeline(
"image-classification",
model=model_path,
device=device,
)
def predict(self, image: Union[str, Path, Image.Image], top_k: int = 5) -> dict:
"""
Classify a single image.
Args:
image: File path (str/Path) or PIL Image.
top_k: Number of top predictions to return.
Returns:
dict with keys: label, confidence, scores
"""
if isinstance(image, (str, Path)):
image = Image.open(image).convert("RGB")
elif not isinstance(image, Image.Image):
raise TypeError(f"Expected str, Path, or PIL.Image, got {type(image)}")
raw = self.pipe(image, top_k=top_k)
return {
"label": raw[0]["label"],
"confidence": round(raw[0]["score"], 4),
"scores": {r["label"]: round(r["score"], 4) for r in raw},
}
def predict_batch(
self,
images: list,
top_k: int = 5,
threshold: float = 0.0,
) -> list:
"""
Classify a list of images.
Args:
images: List of file paths (str/Path) or PIL Images.
top_k: Number of top predictions per image.
threshold: Minimum confidence to accept a prediction.
Returns:
List of dicts, each with: file (if path), label, confidence, scores,
and optionally below_threshold or error.
"""
results = []
for img in images:
entry = {}
if isinstance(img, (str, Path)):
entry["file"] = str(img)
try:
result = self.predict(img, top_k=top_k)
entry.update(result)
if result["confidence"] < threshold:
entry["below_threshold"] = True
except Exception as e:
entry["error"] = str(e)
results.append(entry)
return results
# ─── CLI helpers ──────────────────────────────────────────────────────────────
def collect_images(paths: list) -> list:
"""Gather image file paths from files and directories."""
image_paths = []
for p in paths:
p = Path(p)
if p.is_dir():
for ext in IMAGE_EXTENSIONS:
image_paths.extend(sorted(p.glob(f"*{ext}")))
image_paths.extend(sorted(p.glob(f"*{ext.upper()}")))
elif p.is_file() and p.suffix.lower() in IMAGE_EXTENSIONS:
image_paths.append(p)
else:
print(f"⚠️ Skipping: {p} (not a recognised image file or directory)",
file=sys.stderr)
return image_paths
def print_results(results: list, as_json: bool = False):
"""Pretty-print classification results."""
if as_json:
print(json.dumps(results, indent=2))
return
for r in results:
if "error" in r:
print(f"❌ {r.get('file', '?')}: {r['error']}")
continue
icon = "⚠️ " if r.get("below_threshold") else "βœ…"
name = r.get("file", "<image>")
print(f"{icon} {name}")
print(f" Prediction: {r['label']} ({r['confidence']:.1%})")
scores_str = " | ".join(
f"{lbl}: {sc:.1%}" for lbl, sc in r["scores"].items()
)
print(f" All scores: {scores_str}")
print()
# ─── CLI entry-point ─────────────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(
description=(
"Material Surface Classifier β€” classify images into: "
"asphalt, concrete, metal, wood, other"
),
)
parser.add_argument(
"inputs", nargs="+",
help="Image file(s) or director(y/ies) to classify",
)
parser.add_argument(
"--model", default=DEFAULT_MODEL,
help=f"Model path or Hub ID (default: {DEFAULT_MODEL})",
)
parser.add_argument(
"--device", default=None,
help="Device: cuda / cpu / mps (default: auto-detect)",
)
parser.add_argument(
"--threshold", type=float, default=0.0,
help="Flag predictions below this confidence (default: 0.0)",
)
parser.add_argument(
"--top-k", type=int, default=5,
help="Number of top predictions to return (default: 5)",
)
parser.add_argument(
"--json", action="store_true",
help="Output results as JSON",
)
args = parser.parse_args()
# Discover images
image_paths = collect_images(args.inputs)
if not image_paths:
print("❌ No valid image files found.", file=sys.stderr)
sys.exit(1)
print(f"πŸ” Found {len(image_paths)} image(s) to classify")
print(f"πŸ“¦ Loading model: {args.model}\n")
# Load & run
clf = MaterialSurfaceClassifier(args.model, device=args.device)
results = clf.predict_batch(image_paths, top_k=args.top_k, threshold=args.threshold)
# Output
print_results(results, as_json=args.json)
# Summary
if not args.json:
from collections import Counter
preds = [r["label"] for r in results if "error" not in r]
if preds:
counts = Counter(preds)
print("─" * 40)
print("πŸ“Š Summary:")
for label, count in counts.most_common():
print(f" {label}: {count} image(s)")
if __name__ == "__main__":
main()