Spaces:

achase25
/

DogBreedID

Sleeping

App Files Files Community

DogBreedID / app.py

achase25

Update app.py

30a588a verified 5 months ago

raw

history blame contribute delete

9.22 kB

	import difflib
	import re
	from typing import Dict, Optional, Tuple

	import gradio as gr
	import torch
	import pandas as pd
	from PIL import Image
	from transformers import AutoImageProcessor, AutoModelForImageClassification

	MODEL_ID = "valentinocc/dog-breed-classifier"
	AKC_CSV_PATH = "akc-data-latest.csv"
	DOG_LABELS_PATH = "dogmodelbreedlist.json"


	# -----------Load model + processor-----------------------
	processor = AutoImageProcessor.from_pretrained(MODEL_ID)
	model = AutoModelForImageClassification.from_pretrained(MODEL_ID)
	model.eval()
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)

	# ---------------Data Cleaning Helpers--------------------------------
	def _normalize_name(s: str) -> str:
	#Lowercase, strip non-alphanumerics, collapse spaces
	s = s.lower().strip()
	s = re.sub(r"[^a-z0-9\s]", " ", s)
	s = re.sub(r"\s+", " ", s)
	return s

	def _load_akc_table(path: str) -> Tuple[pd.DataFrame, Dict[str, int]]:
	#Load AKC CSV + build a name->row_index map using a normalized breed name.

	df = pd.read_csv(path)
	name_col = "Unnamed: 0"
	if name_col not in df.columns:
	for c in df.columns:
	if df[c].dtype == "object":
	name_col = c
	break

	# Make a clean 'breed' column for display and mapping
	df = df.rename(columns={name_col: "breed"})
	df["breed"] = df["breed"].astype(str)

	# Build normalized name -> row index map
	index_map: Dict[str, int] = {}
	for idx, name in enumerate(df["breed"].tolist()):
	index_map[_normalize_name(name)] = idx

	return df, index_map

	akc_df, akc_name_to_idx = _load_akc_table(AKC_CSV_PATH)


	# ------------------Alias rules----------------------------------------

	# 1) Direct alias corrections. This fixes issues when pairing predictions with corresponding dataset column names (normalized -> AKC display name)
	ALIAS_DIRECT: Dict[str, str] = {
	# Poodles (AKC rows are usually written with parentheses)
	"standard poodle": "Poodle (Standard)",
	"miniature poodle": "Poodle (Miniature)",
	"toy poodle": "Poodle (Toy)",

	# Dachshund sizes often appear both ways in the wild
	"miniature dachshund": "Dachshund (Miniature)",
	"standard dachshund": "Dachshund (Standard)", # present in some AKC tables

	# Bull Terrier miniature vs. base
	"miniature bull terrier": "Bull Terrier (Miniature)",

	# American Eskimo Dog varieties
	"toy american eskimo": "American Eskimo Dog (Toy)",
	"miniature american eskimo": "American Eskimo Dog (Miniature)",
	"standard american eskimo": "American Eskimo Dog (Standard)",
	"toy american eskimo dog": "American Eskimo Dog (Toy)",
	"miniature american eskimo dog": "American Eskimo Dog (Miniature)",
	"standard american eskimo dog": "American Eskimo Dog (Standard)",

	# Others
	"eskimo dog": "American Eskimo Dog",
	"wire haired fox terrier": "Fox Terrier (Wire)",
	"smooth fox terrier": "Fox Terrier (Smooth)",
	"black and tan coonhound": "Black and Tan Coonhound",
	"german short haired pointer": "German Shorthaired Pointer",
	"german long haired pointer": "German Longhaired Pointer",
	"curly coated retriever": "Curly-Coated Retriever",
	"flat coated retriever": "Flat-Coated Retriever",
	"yorkshire terrier": "Yorkshire Terrier",
	"welsh springer spaniel": "Welsh Springer Spaniel",
	"english springer": "English Springer Spaniel",
	}

	# 2) Generic flip: "<Variant> <Base>" -> "<Base> (<Variant>)"
	# We only attempt the flip and accept it if it exists in the AKC index.
	SIZE_VARIANTS = {"standard", "miniature", "toy", "giant"}

	def _try_alias_then_flip(norm_label: str) -> Optional[pd.Series]:
	# Resolve aliases for common size naming and try a safe 'flip' if needed."""
	# a) direct alias table
	if norm_label in ALIAS_DIRECT:
	target = _normalize_name(ALIAS_DIRECT[norm_label])
	idx = akc_name_to_idx.get(target)
	if idx is not None:
	return akc_df.iloc[idx]

	# b) generic flip: "<variant> <rest>" -> "<rest> (<variant>)" IF that exists in AKC
	parts = norm_label.split(" ", 1)
	if len(parts) == 2:
	first, rest = parts[0], parts[1]
	if first in SIZE_VARIANTS:
	flipped_display = f"{rest.title()} ({first.title()})"
	flipped_norm = _normalize_name(flipped_display)
	idx = akc_name_to_idx.get(flipped_norm)
	if idx is not None:
	return akc_df.iloc[idx]

	return None

	# ------------------Lookup in AKC table------------------------------------------

	def _lookup_breed_info(pred_label: str) -> Optional[pd.Series]:
	"""
	Find the best matching AKC row for a model label.
	1) Direct normalized match
	2) Alias resolution and safe variant flip ('Standard Poodle' -> 'Poodle (Standard)')
	3) Simple stripped variants (remove trailing 'dog', 'terrier', 'hound')
	4) Fuzzy match via difflib
	"""
	norm = _normalize_name(pred_label)

	# 1) direct match
	idx = akc_name_to_idx.get(norm)
	if idx is not None:
	return akc_df.iloc[idx]

	# 2) alias + safe flip
	row = _try_alias_then_flip(norm)
	if row is not None:
	return row

	# 3) simple stripped variants
	stripped_variants = {
	norm,
	re.sub(r"\bdog\b$", "", norm).strip(),
	re.sub(r"\bterrier\b$", "", norm).strip(),
	re.sub(r"\bhound\b$", "", norm).strip(),
	}
	for v in stripped_variants:
	if v in akc_name_to_idx:
	return akc_df.iloc[akc_name_to_idx[v]]

	# 4) fuzzy match
	candidates = difflib.get_close_matches(norm, akc_name_to_idx.keys(), n=1, cutoff=0.75)
	if candidates:
	return akc_df.iloc[akc_name_to_idx[candidates[0]]]

	return None

	def _format_breed_info(row: pd.Series) -> str:
	#Turn a single AKC row into a readable markdown snippet.
	def get(col, fallback="—"):
	return row[col] if col in row and pd.notna(row[col]) else fallback

	lines = []
	lines.append(f"### {get('breed', 'Unknown Breed')}")
	if pd.notna(get('description')):
	lines.append(f"{get('description')}\n")

	# Facts block
	facts = []
	if pd.notna(get('group')):
	facts.append(f"Group: {get('group')}")
	if pd.notna(get('temperament')):
	facts.append(f"Temperament: {get('temperament')}")
	# Height (inches)
	hmin, hmax = get('min_height'), get('max_height')
	if pd.notna(hmin) or pd.notna(hmax):
	facts.append(f"Height: {hmin if pd.notna(hmin) else '—'}–{hmax if pd.notna(hmax) else '—'} in")
	# Weight (pounds)
	wmin, wmax = get('min_weight'), get('max_weight')
	if pd.notna(wmin) or pd.notna(wmax):
	facts.append(f"Weight: {wmin if pd.notna(wmin) else '—'}–{wmax if pd.notna(wmax) else '—'} lb")
	# Life Expectancy (years)
	emin, emax = get('min_expectancy'), get('max_expectancy')
	if pd.notna(emin) or pd.notna(emax):
	facts.append(f"Life Expectancy: {emin if pd.notna(emin) else '—'}–{emax if pd.notna(emax) else '—'} yrs")

	if facts:
	lines.append("\n".join(facts))

	# Optional traits if present in our AKC Dataset
	trait_fields = [
	("grooming_frequency_category", "Grooming"),
	("shedding_category", "Shedding"),
	("energy_level_category", "Energy Level"),
	("trainability_category", "Trainability"),
	("demeanor_category", "Demeanor"),
	]
	traits = []
	for col, label in trait_fields:
	val = get(col)
	if pd.notna(val):
	traits.append(f"- {label}: {val}")
	if traits:
	lines.append("\nTraits")
	lines.extend(traits)

	return "\n\n".join(lines).strip()

	# ---------------------Inference function----------------
	# Accepts a PIL image and returns:
	# - Top-1 predicted breed with confidence
	# - A markdown block of AKC info for that breed (if found)
	def predict_with_info(img: Image.Image) -> str:

	inputs = processor(images=img, return_tensors="pt").to(device)
	with torch.inference_mode():
	logits = model(**inputs).logits
	probs = torch.softmax(logits, dim=-1)
	top_id = int(torch.argmax(probs, dim=-1).item())
	top_prob = float(probs[0, top_id].item())

	label = model.config.id2label.get(top_id, "Unknown")
	header = f"Prediction: {label} ({top_prob:.2%})"

	row = _lookup_breed_info(label)
	if row is None:
	return header + "\n\n_No matching breed found in AKC dataset._"

	info_md = _format_breed_info(row)
	return header + "\n\n" + info_md


	# -------------------- UI -------------------------------------

	demo = gr.Interface(
	fn=predict_with_info,
	inputs=gr.Image(type="pil", label="Upload a dog photo"),
	outputs=gr.Markdown(label="Prediction + Breed Info"),
	title="Dog Breed Classifier + AKC Info",
	description=(
	f"Upload an image of a dog. The app predicts the breed using '{MODEL_ID}' "
	"and shows breed details from the American Kennel Club dataset. Dataset: https://github.com/tmfilho/akcdata/blob/master/data/akc-data-latest.csv"
	),
	allow_flagging="never",
	)

	if __name__ == "__main__":
	demo.launch()