Spaces:

build-small-hackathon
/

Forager-Field-Notes

Running

App Files Files Community

Forager-Field-Notes / pipeline /convergence.py

HomesteaderLabs

Deploy Forager's Field Station

0e3ea0a verified 3 days ago

raw

history blame contribute delete

4.7 kB

	"""
	convergence.py — Turn an infer.Pipeline result into a single ForagerResult.

	Ported from forager_ml with the Hailo coupling removed: instead of a
	RawPrediction it takes the plain dict returned by infer.Pipeline.identify().
	The safety-first philosophy is unchanged — abstain by default, flag DEADLY
	prominently, never present a below-threshold guess as an identification.
	"""

	from dataclasses import dataclass

	from .metadata import SPECIES_METADATA, UNKNOWN_META

	LOW_CONFIDENCE_THRESHOLD = 0.50 # below this -> flagged low_confidence
	EXPERT_CONFIDENCE_THRESHOLD = 0.60 # a committed (non-deadly) ID must clear this.
	# These experts are accurate but underconfident
	# on SAFE classes (avg ~0.5–0.6); 0.60 balances
	# decisiveness (75% safe-correct) against residual
	# deadly-as-safe. The UX never treats SAFE as a
	# green light — see app.py — so the gate is a
	# usability dial, not the safety mechanism.
	DEADLY_VETO_FLOOR = 0.40 # a DEADLY call at/above this overrides any
	# higher-confidence SAFE/CAUTION call from
	# another expert (safety-biased arbitration)


	@dataclass
	class ForagerResult:
	domain: str
	species: str # class key, or "unknown"
	scientific_name: str
	confidence: float
	safety: str # SAFE \| CAUTION \| DEADLY \| UNKNOWN
	lookalike: str
	key_diff: str
	low_confidence: bool
	expert_model: str
	abstained: bool
	reason: str # why we abstained (or "" when committed)

	@property
	def is_deadly(self) -> bool:
	return self.safety == "DEADLY" and not self.low_confidence

	@property
	def is_unknown(self) -> bool:
	return self.species == "unknown"


	_ABSTAIN_REASON = {
	"uncertain_domain": "Couldn't confidently place this in a known domain.",
	"off_domain": "This doesn't look like anything in the trained domains.",
	"low_confidence": "Leaning toward an answer, but not confident enough to commit.",
	}


	def _abstain(domain: str, reason: str, conf: float = 0.0, expert: str = "none") -> ForagerResult:
	return ForagerResult(
	domain=domain, species="unknown", scientific_name=UNKNOWN_META["scientific"],
	confidence=conf, safety="UNKNOWN", lookalike=UNKNOWN_META["lookalike"],
	key_diff=_ABSTAIN_REASON.get(reason, UNKNOWN_META["key_diff"]),
	low_confidence=True, expert_model=expert, abstained=True, reason=reason,
	)


	def _commit(domain: str, c: dict) -> ForagerResult:
	species = c["species"]
	meta = SPECIES_METADATA.get(species, UNKNOWN_META)
	conf = float(c["confidence"])
	return ForagerResult(
	domain=domain, species=species, scientific_name=meta["scientific"], confidence=conf,
	safety=meta["safety"], lookalike=meta["lookalike"], key_diff=meta["key_diff"],
	low_confidence=conf < LOW_CONFIDENCE_THRESHOLD, expert_model=c["expert"],
	abstained=False, reason="",
	)


	def build_result(call: dict) -> ForagerResult:
	"""
	`call` is the dict from infer.Pipeline.identify().

	Safety-biased arbitration across the domain's experts:
	1. If ANY expert flags a DEADLY species at >= DEADLY_VETO_FLOOR, surface
	that — a deadly vote vetoes a more-confident SAFE/CAUTION call from an
	off-domain expert (prevents e.g. hemlock -> "ramps" because highvalue
	is more confident than medicinals).
	2. Otherwise take the highest-confidence call, abstaining if it can't
	clear EXPERT_CONFIDENCE_THRESHOLD.
	"""
	domain = call.get("domain", "unknown")

	if call.get("abstain") and "calls" not in call:
	return _abstain(domain, call.get("reason", "low_confidence"),
	float(call.get("confidence", 0.0)), call.get("expert", "none"))

	calls = call.get("calls", [])
	if not calls:
	return _abstain(domain, "low_confidence")

	for c in calls:
	c["safety"] = SPECIES_METADATA.get(c["species"], UNKNOWN_META)["safety"]

	deadly = [c for c in calls if c["safety"] == "DEADLY" and c["confidence"] >= DEADLY_VETO_FLOOR]
	if deadly:
	return _commit(domain, max(deadly, key=lambda c: c["confidence"]))

	best = max(calls, key=lambda c: c["confidence"])
	if best["confidence"] < EXPERT_CONFIDENCE_THRESHOLD:
	return _abstain(domain, "low_confidence", best["confidence"], best["expert"])
	return _commit(domain, best)