Forager-Field-Notes / pipeline /convergence.py
HomesteaderLabs's picture
Deploy Forager's Field Station
0e3ea0a verified
"""
convergence.py β€” Turn an infer.Pipeline result into a single ForagerResult.
Ported from forager_ml with the Hailo coupling removed: instead of a
RawPrediction it takes the plain dict returned by infer.Pipeline.identify().
The safety-first philosophy is unchanged β€” abstain by default, flag DEADLY
prominently, never present a below-threshold guess as an identification.
"""
from dataclasses import dataclass
from .metadata import SPECIES_METADATA, UNKNOWN_META
LOW_CONFIDENCE_THRESHOLD = 0.50 # below this -> flagged low_confidence
EXPERT_CONFIDENCE_THRESHOLD = 0.60 # a committed (non-deadly) ID must clear this.
# These experts are accurate but underconfident
# on SAFE classes (avg ~0.5–0.6); 0.60 balances
# decisiveness (75% safe-correct) against residual
# deadly-as-safe. The UX never treats SAFE as a
# green light β€” see app.py β€” so the gate is a
# usability dial, not the safety mechanism.
DEADLY_VETO_FLOOR = 0.40 # a DEADLY call at/above this overrides any
# higher-confidence SAFE/CAUTION call from
# another expert (safety-biased arbitration)
@dataclass
class ForagerResult:
domain: str
species: str # class key, or "unknown"
scientific_name: str
confidence: float
safety: str # SAFE | CAUTION | DEADLY | UNKNOWN
lookalike: str
key_diff: str
low_confidence: bool
expert_model: str
abstained: bool
reason: str # why we abstained (or "" when committed)
@property
def is_deadly(self) -> bool:
return self.safety == "DEADLY" and not self.low_confidence
@property
def is_unknown(self) -> bool:
return self.species == "unknown"
_ABSTAIN_REASON = {
"uncertain_domain": "Couldn't confidently place this in a known domain.",
"off_domain": "This doesn't look like anything in the trained domains.",
"low_confidence": "Leaning toward an answer, but not confident enough to commit.",
}
def _abstain(domain: str, reason: str, conf: float = 0.0, expert: str = "none") -> ForagerResult:
return ForagerResult(
domain=domain, species="unknown", scientific_name=UNKNOWN_META["scientific"],
confidence=conf, safety="UNKNOWN", lookalike=UNKNOWN_META["lookalike"],
key_diff=_ABSTAIN_REASON.get(reason, UNKNOWN_META["key_diff"]),
low_confidence=True, expert_model=expert, abstained=True, reason=reason,
)
def _commit(domain: str, c: dict) -> ForagerResult:
species = c["species"]
meta = SPECIES_METADATA.get(species, UNKNOWN_META)
conf = float(c["confidence"])
return ForagerResult(
domain=domain, species=species, scientific_name=meta["scientific"], confidence=conf,
safety=meta["safety"], lookalike=meta["lookalike"], key_diff=meta["key_diff"],
low_confidence=conf < LOW_CONFIDENCE_THRESHOLD, expert_model=c["expert"],
abstained=False, reason="",
)
def build_result(call: dict) -> ForagerResult:
"""
`call` is the dict from infer.Pipeline.identify().
Safety-biased arbitration across the domain's experts:
1. If ANY expert flags a DEADLY species at >= DEADLY_VETO_FLOOR, surface
that β€” a deadly vote vetoes a more-confident SAFE/CAUTION call from an
off-domain expert (prevents e.g. hemlock -> "ramps" because highvalue
is more confident than medicinals).
2. Otherwise take the highest-confidence call, abstaining if it can't
clear EXPERT_CONFIDENCE_THRESHOLD.
"""
domain = call.get("domain", "unknown")
if call.get("abstain") and "calls" not in call:
return _abstain(domain, call.get("reason", "low_confidence"),
float(call.get("confidence", 0.0)), call.get("expert", "none"))
calls = call.get("calls", [])
if not calls:
return _abstain(domain, "low_confidence")
for c in calls:
c["safety"] = SPECIES_METADATA.get(c["species"], UNKNOWN_META)["safety"]
deadly = [c for c in calls if c["safety"] == "DEADLY" and c["confidence"] >= DEADLY_VETO_FLOOR]
if deadly:
return _commit(domain, max(deadly, key=lambda c: c["confidence"]))
best = max(calls, key=lambda c: c["confidence"])
if best["confidence"] < EXPERT_CONFIDENCE_THRESHOLD:
return _abstain(domain, "low_confidence", best["confidence"], best["expert"])
return _commit(domain, best)