Spaces:

EphAsad
/

BactKing

Sleeping

App Files Files Community

EphAsad commited on Nov 18, 2025

Commit

bb75255

verified ·

1 Parent(s): 37d9a5e

Update engine/parser_rules.py

Browse files

Files changed (1) hide show

engine/parser_rules.py +96 -212

engine/parser_rules.py CHANGED Viewed

@@ -2,31 +2,31 @@
 # ------------------------------------------------------------
 # Rule-based core parser for microbiology descriptions.
 #
-# Stage 11F (Option A ranges + fixes) + 11H + 11I + 11J:
 #
 # - Always store Growth Temperature as "low//high"
 #   • single: 37 → "37//37"
-#   • two temps in text: min//max (e.g. "4 °C but not 45 °C" → "4//45")
-# - DNase robust parsing (DNase / DNase test, DNase activity, etc.)
-# - Non–spore-forming → Spore Formation = Negative (regex + early return)
 # - "non-H2S producing" → H2S = Negative
-# - "aerobically" / "anaerobically" → Aerobic / Anaerobic
 # - NaCl tolerance phrases improved
-# - Colony morphology from "colonies dry, white and irregular on nutrient agar"
 #
-# New in this version:
-#   • "Gelatinase positive/negative" → Gelatin Hydrolysis Positive/Negative
-#   • "<sugar> fermenter" → <Sugar> Fermentation = Positive
-#   • "<sugar> is positive/negative" handled
-#   • "<sugar> fermentation is positive/negative" handled
-#   • Grouped "does not ferment lactose and sucrose" handled cleanly
-#     (does NOT accidentally mark glucose negative when it appears after "but")
-#   • Global non-fermenter + explicit positive sugar:
-#       "Non-fermenter, ferments glucose weakly"
-#       → all sugars Negative *except* Glucose = Positive
-#   • Core tests accept "is positive/is negative/is variable"
-#   • "H2S production is positive/negative" handled
-#   • ONPG phrases like "ONPG is negative" parsed via core patterns
 # ------------------------------------------------------------
 from __future__ import annotations
@@ -34,14 +34,12 @@ from __future__ import annotations
 import re
 from typing import Dict, Any, List
 UNKNOWN = "Unknown"
 # ------------------------------------------------------------
 # Core fields and sugar mapping
 # ------------------------------------------------------------
-# Sugar name → core DB column
 SUGAR_FIELDS: Dict[str, str] = {
     "glucose": "Glucose Fermentation",
     "lactose": "Lactose Fermentation",
@@ -58,25 +56,17 @@ SUGAR_FIELDS: Dict[str, str] = {
 }
 CORE_BOOL_FIELDS: Dict[str, List[str]] = {
-    # field: [keywords to recognise the test name]
     "Catalase": ["catalase"],
     "Oxidase": ["oxidase"],
     "Indole": ["indole"],
     "Urease": ["urease"],
     "Citrate": ["citrate"],
-    # MR: include "mr"
     "Methyl Red": ["methyl red", "mr test", "mr"],
     "VP": ["voges-proskauer", "vp test", "vp"],
-    # H2S (includes H₂S → normalised to H2S in _clean_text)
     "H2S": ["h2s", "hydrogen sulfide"],
-    # DNase: broaden patterns
     "DNase": [
-        "dnase",
-        "dnase test",
-        "dnase activity",
-        "dnase production",
-        "dnaase",
-        "dna hydrolysis",
     ],
     "ONPG": ["onpg"],
     "Coagulase": ["coagulase"],
@@ -87,7 +77,6 @@ CORE_BOOL_FIELDS: Dict[str, List[str]] = {
     "Ornitihine Decarboxylase": ["ornithine decarboxylase", "ornithine decarb"],
     "Arginine dihydrolase": ["arginine dihydrolase"],
     "Gelatin Hydrolysis": ["gelatin hydrolysis", "gelatinase"],
-    # Esculin Hydrolysis: also match plain "esculin"
     "Esculin Hydrolysis": ["esculin hydrolysis", "esculin"],
 }
@@ -96,41 +85,22 @@ CORE_BOOL_FIELDS: Dict[str, List[str]] = {
 # ------------------------------------------------------------
 def _clean_text(text: str) -> str:
-    """
-    Normalise unicode oddities and collapse whitespace.
-    Also:
-      - strip degree symbols
-      - normalise subscript ₂ → 2 for H₂S
-    """
     if not text:
         return ""
     s = text.replace("°", "").replace("º", "")
-    # normalise subscript 2 (H₂S → H2S)
     s = s.replace("₂", "2")
-    # collapse whitespace
     return " ".join(s.split())
 def _norm(s: str) -> str:
     return s.strip().lower()
 def _set_if_stronger(parsed: Dict[str, str], field: str, value: str) -> None:
-    """
-    Write value to parsed[field] if:
-      - field not present, or
-      - we are replacing Unknown with a concrete value
-    """
     if not value:
         return
     if field not in parsed or parsed[field] == UNKNOWN:
         parsed[field] = value
 def _value_from_pnv_token(token: str) -> str | None:
-    """
-    Map a simple token to Positive / Negative / Variable.
-    """
     seg = _norm(token)
     if seg in ["positive", "pos", "+"]:
         return "Positive"
@@ -140,28 +110,15 @@ def _value_from_pnv_token(token: str) -> str | None:
         return "Variable"
     return None
 def _value_from_pnv_context(segment: str) -> str | None:
-    """
-    Interpret a phrase as Positive / Negative / Variable.
-    Handles:
-      - "positive"
-      - "is positive"
-      - "+", "neg", etc.
-    """
     seg = _norm(segment)
-    # direct token first
     val = _value_from_pnv_token(seg)
     if val:
         return val
-    # "... is positive"
     m = re.search(r"\bis\s+(positive|negative|variable|pos|neg|\+|\-)\b", seg)
     if m:
         return _value_from_pnv_token(m.group(1))
     return None
 # ------------------------------------------------------------
 # Gram stain and shape
 # ------------------------------------------------------------
@@ -176,21 +133,17 @@ def _parse_gram_and_shape(text_lc: str, parsed: Dict[str, str]) -> None:
         _set_if_stronger(parsed, "Gram Stain", "Variable")
     # Shape
-    # Prefer "short rods" over generic rods
     if "short rods" in text_lc:
         _set_if_stronger(parsed, "Shape", "Short Rods")
-    # Cocci and variants (diplococci, tetracocci, etc.)
     if re.search(r"\bcocci\b", text_lc):
         _set_if_stronger(parsed, "Shape", "Cocci")
     if re.search(r"\b(diplococci|tetracocci|streptococci|staphylococci)\b", text_lc):
         _set_if_stronger(parsed, "Shape", "Cocci")
-    # Rods / bacilli
     if re.search(r"\brods?\b", text_lc) or "bacilli" in text_lc:
         _set_if_stronger(parsed, "Shape", "Rods")
-    # Spiral
     if "spiral" in text_lc or "spirochete" in text_lc:
         _set_if_stronger(parsed, "Shape", "Spiral")
@@ -200,11 +153,6 @@ def _parse_gram_and_shape(text_lc: str, parsed: Dict[str, str]) -> None:
 # ------------------------------------------------------------
 def _parse_haemolysis(text_lc: str, parsed: Dict[str, str]) -> None:
-    """
-    Handle haemolysis phrasing:
-      - beta-haemolytic / beta hemolytic / beta-haemolysis / etc.
-      - alpha- / gamma- / non-haemolytic
-    """
     # Beta
     if re.search(r"beta[- ]?(haemolytic|hemolytic|haemolysis|hemolysis)", text_lc):
         _set_if_stronger(parsed, "Haemolysis Type", "Beta")
@@ -219,6 +167,7 @@ def _parse_haemolysis(text_lc: str, parsed: Dict[str, str]) -> None:
     if re.search(r"gamma[- ]?(haemolytic|hemolytic|haemolysis|hemolysis)", text_lc):
         _set_if_stronger(parsed, "Haemolysis Type", "Gamma")
         _set_if_stronger(parsed, "Haemolysis", "Negative")
     if (
         "non-haemolytic" in text_lc
         or "non hemolytic" in text_lc
@@ -243,16 +192,19 @@ def _parse_core_bool_tests(text_lc: str, parsed: Dict[str, str]) -> None:
       - "catalase positive"
       - "positive for catalase"
       - "catalase is positive"
-    Also handles:
       - NaCl tolerance with % values
       - Nitrate reduction text
       - H2S production / non-production
-      - DNase universal coverage
-      - explicit gelatinase → Gelatin Hydrolysis mapping
     """
     for field, keywords in CORE_BOOL_FIELDS.items():
         for kw in keywords:
-            # "... catalase positive"
             m1 = re.search(
                 rf"{re.escape(kw)}[ \-]?"
                 r"(positive|negative|variable|pos|neg|\+|\-)",
@@ -264,7 +216,7 @@ def _parse_core_bool_tests(text_lc: str, parsed: Dict[str, str]) -> None:
                     _set_if_stronger(parsed, field, val)
                     break
-            # "positive for catalase"
             m2 = re.search(
                 rf"(positive|negative|variable|pos|neg|\+|\-)\s+"
                 rf"(for\s+)?{re.escape(kw)}",
@@ -276,7 +228,7 @@ def _parse_core_bool_tests(text_lc: str, parsed: Dict[str, str]) -> None:
                     _set_if_stronger(parsed, field, val)
                     break
-            # "<kw> is positive"
             m3 = re.search(
                 rf"{re.escape(kw)}\s+is\s+"
                 r"(positive|negative|variable|pos|neg|\+|\-)",
@@ -288,9 +240,44 @@ def _parse_core_bool_tests(text_lc: str, parsed: Dict[str, str]) -> None:
                     _set_if_stronger(parsed, field, val)
                     break
         # Special-case NaCl tolerance with explicit percentages
         if field == "NaCl Tolerant (>=6%)":
-            # e.g. "grows in 6.5% NaCl", "grows at 10% NaCl"
             for m in re.finditer(
                 r"(grows|growth)\s+(in|at)\s*(\d+(?:\.\d+)?)\s*%?\s*nacl",
                 text_lc,
@@ -302,7 +289,6 @@ def _parse_core_bool_tests(text_lc: str, parsed: Dict[str, str]) -> None:
                 except Exception:
                     pass
-            # e.g. "NaCl tolerant up to 10%"
             for m in re.finditer(
                 r"nacl\s+tolerant\s+(?:to|up to)?\s*(\d+(?:\.\d+)?)\s*%?",
                 text_lc,
@@ -314,14 +300,12 @@ def _parse_core_bool_tests(text_lc: str, parsed: Dict[str, str]) -> None:
                 except Exception:
                     pass
-            # explicit negative phrasing: "does not grow in 7% NaCl"
             if re.search(
                 r"does (not|n't) grow\s+(in|at)\s*(\d+(?:\.\d+)?)\s*%?\s*nacl",
                 text_lc,
             ):
                 _set_if_stronger(parsed, "NaCl Tolerant (>=6%)", "Negative")
-            # general "in 6.5% NaCl" → assume tolerance if no explicit "no growth"
             for m in re.finditer(
                 r"\bin\s*(\d+(?:\.\d+)?)\s*%?\s*nacl",
                 text_lc,
@@ -333,13 +317,13 @@ def _parse_core_bool_tests(text_lc: str, parsed: Dict[str, str]) -> None:
                 except Exception:
                     pass
-    # Nitrate: "reduces nitrate" / "does not reduce nitrate"
     if re.search(r"reduces nitrate", text_lc):
         _set_if_stronger(parsed, "Nitrate Reduction", "Positive")
     if re.search(r"does (not|n't) reduce nitrate", text_lc):
         _set_if_stronger(parsed, "Nitrate Reduction", "Negative")
-    # H2S: "produces H2S", "H2S production", "H2S production is positive"
     if re.search(r"(produces|production of)\s+h2s", text_lc):
         _set_if_stronger(parsed, "H2S", "Positive")
     if re.search(r"h2s production\s+is\s+(positive|pos|\+)", text_lc):
@@ -353,29 +337,21 @@ def _parse_core_bool_tests(text_lc: str, parsed: Dict[str, str]) -> None:
     ):
         _set_if_stronger(parsed, "H2S", "Negative")
-    # --- DNase universal coverage ---
-    # Positive forms
     if re.search(r"\bdnase(\s+test|\s+activity|\s+production)?\s*(positive|pos|\+)\b", text_lc):
         _set_if_stronger(parsed, "DNase", "Positive")
     if re.search(r"\b(positive|pos|\+)\s+dnase(\s+test|\s+activity|\s+production)?\b", text_lc):
         _set_if_stronger(parsed, "DNase", "Positive")
-    # Negative forms
     if re.search(r"\bdnase(\s+test|\s+activity|\s+production)?\s*(negative|neg|\-)\b", text_lc):
         _set_if_stronger(parsed, "DNase", "Negative")
     if re.search(r"\b(negative|neg|\-)\s+dnase(\s+test|\s+activity|\s+production)?\b", text_lc):
         _set_if_stronger(parsed, "DNase", "Negative")
-    # non-DNase-producing
     if re.search(r"\bnon[- ]?dnase[- ]?producing\b", text_lc):
         _set_if_stronger(parsed, "DNase", "Negative")
-    # --- NEW: Gelatinase → Gelatin Hydrolysis ---
     if re.search(r"\bgelatinase\s*(positive|pos|\+)\b", text_lc):
         _set_if_stronger(parsed, "Gelatin Hydrolysis", "Positive")
     if re.search(r"\bgelatinase\s*(negative|neg|\-)\b", text_lc):
         _set_if_stronger(parsed, "Gelatin Hydrolysis", "Negative")
@@ -402,7 +378,6 @@ def _parse_motility_capsule_spores(text_lc: str, parsed: Dict[str, str]) -> None
     ):
         _set_if_stronger(parsed, "Motility", "Negative")
-    # Specific motility phrases: tumbling, swarming, corkscrew
     if (
         "tumbling motility" in text_lc
         or "swarming motility" in text_lc
@@ -411,7 +386,7 @@ def _parse_motility_capsule_spores(text_lc: str, parsed: Dict[str, str]) -> None
     ):
         _set_if_stronger(parsed, "Motility", "Positive")
-    # Capsule (including "capsule positive/negative")
     if (
         "capsulated" in text_lc
         or "encapsulated" in text_lc
@@ -428,15 +403,13 @@ def _parse_motility_capsule_spores(text_lc: str, parsed: Dict[str, str]) -> None
         _set_if_stronger(parsed, "Capsule", "Negative")
     # Spore formation
-    # NEGATIVE FIRST with strict boundaries, then early-return
     if (
         re.search(r"\bnon[-\s]?spore[-\s]?forming\b", text_lc)
         or "no spores" in text_lc
     ):
         _set_if_stronger(parsed, "Spore Formation", "Negative")
-        return  # prevent any positive overwrite
-    # POSITIVE (must not match the negative form)
     if (
         re.search(r"\bspore[-\s]?forming\b", text_lc)
         or "forms spores" in text_lc
@@ -449,17 +422,9 @@ def _parse_motility_capsule_spores(text_lc: str, parsed: Dict[str, str]) -> None
 # ------------------------------------------------------------
 def _parse_oxygen(text_lc: str, parsed: Dict[str, str]) -> None:
-    """
-    Robust oxygen parsing:
-      - Handle facultative first
-      - Avoid "aerobic" accidentally matching inside "anaerobic"
-      - Include "aerobically" / "anaerobically"
-    """
-    # Facultative first
     if re.search(r"facultative(ly)? anaerob", text_lc):
         _set_if_stronger(parsed, "Oxygen Requirement", "Facultative Anaerobe")
-    # Strict anaerobic (before aerobic)
     if (
         re.search(r"\bobligate anaerob", text_lc)
         or (re.search(r"\banaerobic\b", text_lc) and "facultative" not in text_lc)
@@ -467,14 +432,10 @@ def _parse_oxygen(text_lc: str, parsed: Dict[str, str]) -> None:
     ):
         _set_if_stronger(parsed, "Oxygen Requirement", "Anaerobic")
-    # Now handle purely aerobic, avoiding "anaerobic"
     if (
         re.search(r"\bobligate aerobe\b", text_lc)
         or (re.search(r"\baerobic\b", text_lc) and "anaerobic" not in text_lc)
-        or (
-            re.search(r"\baerobically\b", text_lc)
-            and "anaerobically" not in text_lc
-        )
     ):
         _set_if_stronger(parsed, "Oxygen Requirement", "Aerobic")
@@ -490,14 +451,6 @@ def _parse_oxygen(text_lc: str, parsed: Dict[str, str]) -> None:
 # ------------------------------------------------------------
 def _parse_growth_temperature(text_lc: str, parsed: Dict[str, str]) -> None:
-    """
-    Look for explicit temperatures like "grows at 37 °C" or ranges like "4–45 °C".
-    We ALWAYS store as "low//high":
-      - true ranges: "4-45 °C" → "4//45"
-      - two temps in text: min//max (Option A)
-      - single temps: "37 °C" → "37//37"
-    """
-    # 1) Explicit ranges like "4-45 °C" or "10–40 °C"
     range_pattern = re.compile(
         r"(\d+)\s*[-–/]\s*(\d+)\s*(?:c|°c|degrees c|degrees celsius)"
     )
@@ -508,7 +461,6 @@ def _parse_growth_temperature(text_lc: str, parsed: Dict[str, str]) -> None:
         _set_if_stronger(parsed, "Growth Temperature", f"{low}//{high}")
         return
-    # 2) Option A: any two explicit temps → min//max
     temps = re.findall(r"(\d+)\s*(?:c|°c|degrees c|degrees celsius)", text_lc)
     if len(temps) >= 2:
         nums = [int(t) for t in temps]
@@ -517,7 +469,6 @@ def _parse_growth_temperature(text_lc: str, parsed: Dict[str, str]) -> None:
         _set_if_stronger(parsed, "Growth Temperature", f"{low}//{high}")
         return
-    # 3) Single temps like "grows at 37 c"
     single_pattern = re.compile(
         r"(grows|growth|optimum|optimal)\s+(?:at\s+)?(\d+)\s*"
         r"(?:c|°c|degrees c|degrees celsius)"
@@ -528,14 +479,12 @@ def _parse_growth_temperature(text_lc: str, parsed: Dict[str, str]) -> None:
         _set_if_stronger(parsed, "Growth Temperature", f"{temp}//{temp}")
         return
-    # 4) Simplified: "grows at 37" (no explicit °C)
     m_simple_num = re.search(r"grows at (\d+)\b", text_lc)
     if m_simple_num:
         temp = m_simple_num.group(1)
         _set_if_stronger(parsed, "Growth Temperature", f"{temp}//{temp}")
         return
-    # 5) Fallback: plain "37c" somewhere in the text
     m_plain = re.search(
         r"\b(\d+)\s*(?:c|°c|degrees c|degrees celsius)\b",
         text_lc,
@@ -546,48 +495,21 @@ def _parse_growth_temperature(text_lc: str, parsed: Dict[str, str]) -> None:
 # ------------------------------------------------------------
-# Media grown on (coarse mapping)
 # ------------------------------------------------------------
 MEDIA_KEYWORDS = {
-    "Blood Agar": [
-        "blood agar",
-        "blood-agar",
-    ],
-    "MacConkey Agar": [
-        "macconkey agar",
-        "mac conkey agar",
-        "macconkey",
-    ],
-    "Chocolate Agar": [
-        "chocolate agar",
-        "chocolate-agar",
-    ],
-    "Nutrient Agar": [
-        "nutrient agar",
-        "nutrient-agar",
-    ],
-    "XLD Agar": [
-        "xld agar",
-    ],
-    "TCBS Agar": [
-        "tcbs agar",
-        "tcbs",
-    ],
-    "ALOA": [
-        "aloa agar",
-        "aloa",
-    ],
-    "BCYE Agar": [
-        "bcye agar",
-        "bcye",
-    ],
-    "MRS Agar": [
-        "mrs agar",
-    ],
 }
 def _parse_media(text_lc: str, parsed: Dict[str, str]) -> None:
     found_media: List[str] = []
     for media_name, patterns in MEDIA_KEYWORDS.items():
@@ -600,34 +522,21 @@ def _parse_media(text_lc: str, parsed: Dict[str, str]) -> None:
 # ------------------------------------------------------------
-# Sugar fermentation parsing
 # ------------------------------------------------------------
 def _parse_sugars(text_lc: str, parsed: Dict[str, str]) -> None:
-    """
-    Handles patterns like:
-      - "glucose positive, mannitol negative"
-      - "ferments glucose, mannitol and sucrose but not lactose"
-      - "does not ferment lactose or sucrose"
-      - "non-lactose fermenter"
-      - "<sugar> fermenter" (positive unless "non-<sugar> fermenter")
-      - "<sugar> is positive/negative"
-      - "<sugar> fermentation is positive/negative"
-      - global non-fermenter phrases
-    """
-    # 0) Simple "glucose positive / negative" style + "<sugar> is positive"
     for sugar_key, field in SUGAR_FIELDS.items():
-        # "glucose positive"
         m_simple = re.search(
-            rf"{sugar_key}\s+(positive|negative|variable|pos|neg|\+|\-)", text_lc
         )
         if m_simple:
             val = _value_from_pnv_context(m_simple.group(1))
             if val:
                 _set_if_stronger(parsed, field, val)
-        # "<sugar> is positive"
         m_is = re.search(
             rf"{sugar_key}\s+is\s+(positive|negative|variable|pos|neg|\+|\-)",
             text_lc,
@@ -637,15 +546,12 @@ def _parse_sugars(text_lc: str, parsed: Dict[str, str]) -> None:
             if val:
                 _set_if_stronger(parsed, field, val)
-    # 0b) "<sugar> fermenter" → Positive; "non-<sugar> fermenter" → Negative
     for sugar_key, field in SUGAR_FIELDS.items():
-        # positive: "lactose fermenter"
         if re.search(rf"\b{sugar_key}\s+fermenter\b", text_lc) and not re.search(
             rf"\bnon[- ]{sugar_key}\s+fermenter\b", text_lc
         ):
             _set_if_stronger(parsed, field, "Positive")
-        # negative: "non-lactose fermenter"
         if re.search(rf"\bnon[- ]{sugar_key}\s+fermenter\b", text_lc):
             _set_if_stronger(parsed, field, "Negative")
@@ -653,24 +559,19 @@ def _parse_sugars(text_lc: str, parsed: Dict[str, str]) -> None:
     ferments_pattern = re.compile(r"ferments\s+([a-z0-9 ,;/&\-]+)")
     for m in ferments_pattern.finditer(text_lc):
         seg = m.group(1)
-        # Split positive vs negative part on "but not"
         neg_split = re.split(r"\bbut not\b", seg, maxsplit=1)
         pos_part = neg_split[0]
         neg_part = neg_split[1] if len(neg_split) > 1 else ""
-        # Positive sugars from pos_part
         for sugar_key, field in SUGAR_FIELDS.items():
             if re.search(rf"\b{sugar_key}\b", pos_part):
                 _set_if_stronger(parsed, field, "Positive")
-        # Negative sugars from neg_part
         for sugar_key, field in SUGAR_FIELDS.items():
             if re.search(rf"\b{sugar_key}\b", neg_part):
                 _set_if_stronger(parsed, field, "Negative")
-    # 2) Grouped "does not ferment X, Y and Z" — but **stop at "but" / punctuation**
-    #    This prevents glucose being accidentally marked negative in:
-    #      "does not ferment lactose or sucrose, but glucose fermentation is positive"
     grouped_neg_pattern = re.compile(
         r"does\s+(?:not|n't)\s+ferment\s+([a-z0-9 ,;/&\-]+?)(?:\s+but\b|\.|;|,|$)"
     )
@@ -680,23 +581,22 @@ def _parse_sugars(text_lc: str, parsed: Dict[str, str]) -> None:
             if re.search(rf"\b{sugar_key}\b", seg):
                 _set_if_stronger(parsed, field, "Negative")
-    # 3) "does not ferment X" (single sugar)
     for sugar_key, field in SUGAR_FIELDS.items():
         if re.search(
             rf"does\s+(?:not|n't)\s+ferment\s+{sugar_key}\b", text_lc
         ):
             _set_if_stronger(parsed, field, "Negative")
-    # 4) "non-lactose fermenter" covered above (+ keep "non-lactose fermenting")
     for sugar_key, field in SUGAR_FIELDS.items():
         if re.search(
             rf"non[- ]{sugar_key}\s+ferment(ing|er)?", text_lc
         ):
             _set_if_stronger(parsed, field, "Negative")
-    # 5) "X fermentation positive/negative" + "is positive"
     for sugar_key, field in SUGAR_FIELDS.items():
-        # "glucose fermentation positive"
         m1 = re.search(
             rf"{sugar_key}\s+fermentation[ \-]?"
             r"(positive|negative|variable|pos|neg|\+|\-)",
@@ -708,7 +608,6 @@ def _parse_sugars(text_lc: str, parsed: Dict[str, str]) -> None:
                 _set_if_stronger(parsed, field, val)
                 continue
-        # "positive for glucose fermentation"
         m2 = re.search(
             rf"(positive|negative|variable|pos|neg|\+|\-)\s+"
             rf"(for\s+)?{sugar_key}\s+fermentation",
@@ -720,7 +619,6 @@ def _parse_sugars(text_lc: str, parsed: Dict[str, str]) -> None:
                 _set_if_stronger(parsed, field, val)
                 continue
-        # NEW: "<sugar> fermentation is positive/negative"
         m3 = re.search(
             rf"{sugar_key}\s+fermentation\s+is\s+"
             r"(positive|negative|variable|pos|neg|\+|\-)",
@@ -732,9 +630,7 @@ def _parse_sugars(text_lc: str, parsed: Dict[str, str]) -> None:
                 _set_if_stronger(parsed, field, val)
                 continue
-    # 6) Global non-fermenter patterns
-    #     e.g. "non-fermenter", "does not ferment sugars"
-    #     → set all sugars Negative *unless* already set by a more specific rule.
     if (
         re.search(
             r"does\s+(?:not|n't)\s+ferment\s+(carbohydrates|sugars)", text_lc
@@ -744,20 +640,11 @@ def _parse_sugars(text_lc: str, parsed: Dict[str, str]) -> None:
         for field in SUGAR_FIELDS.values():
             if field not in parsed or parsed[field] == UNKNOWN:
                 _set_if_stronger(parsed, field, "Negative")
 # ------------------------------------------------------------
-# Colony morphology (coarse, optional)
 # ------------------------------------------------------------
 def _parse_colony(text_lc: str, parsed: Dict[str, str]) -> None:
-    """
-    Very coarse mapping for colony morphology. We try:
-      - "colonies are yellow, mucoid"
-      - "colonies dry, white and irregular on nutrient agar"
-      - "forming green colonies", "forms mucoid colonies"
-    """
-    # Pattern 1: "colonies are ..."
     m = re.search(r"colon(y|ies)\s+(are|is)\s+([a-z0-9 ,;\-]+)", text_lc)
     if m:
         desc = m.group(3).strip()
@@ -769,7 +656,6 @@ def _parse_colony(text_lc: str, parsed: Dict[str, str]) -> None:
                 _set_if_stronger(parsed, "Colony Morphology", pretty)
                 return
-    # Pattern 2: "colonies dry, white and irregular on nutrient agar"
     m2 = re.search(
         r"colonies\s+([a-z0-9 ,;\-]+?)(?:\s+on\b|\.|,)",
         text_lc,
@@ -784,7 +670,6 @@ def _parse_colony(text_lc: str, parsed: Dict[str, str]) -> None:
                 _set_if_stronger(parsed, "Colony Morphology", pretty)
                 return
-    # Pattern 3: "forming green colonies", "forms mucoid colonies"
     m3 = re.search(
         r"(forming|forms|produces)\s+([a-z0-9 ,;\-]+?)\s+colonies",
         text_lc,
@@ -831,10 +716,9 @@ def parse_text_rules(text: str) -> Dict[str, Any]:
         }
     except Exception as e:
-        # Fail-safe: never crash the app, just report an error
         return {
             "parsed_fields": parsed,
             "source": "rule_parser",
             "raw": original,
             "error": f"{type(e).__name__}: {e}",
-        }

 # ------------------------------------------------------------
 # Rule-based core parser for microbiology descriptions.
 #
+# Stage 11F (Option A ranges + fixes) + 11H + 11I + 11J + 11K:
 #
 # - Always store Growth Temperature as "low//high"
 #   • single: 37 → "37//37"
+#   • two temps in text: min//max
+# - DNase robust parsing (DNase test / activity / production)
+# - Non–spore-forming → Spore Formation = Negative
 # - "non-H2S producing" → H2S = Negative
+# - Aerobic / Anaerobic including aerobically/anaerobically
 # - NaCl tolerance phrases improved
+# - Colony morphology extraction
 #
+# New additions:
+#   • "Gelatinase positive/negative" → Gelatin Hydrolysis
+#   • "<sugar> fermenter" → <Sugar> Fermentation
+#   • "<sugar> is positive/negative"
+#   • "<sugar> fermentation is positive/negative"
+#   • Grouped negative sugars, avoiding false glucose-negative
+#   • Global non-fermenter + explicit positive sugar handled
+#   • Core tests accept "is positive"
+#   • H2S production "is positive/negative"
+#   • "ONPG is negative" captured
+#   • NEW (11K): "<kw> reaction is positive/negative"
+#                "<kw> reaction positive/negative"
+#                "<kw> test reaction is positive"
 # ------------------------------------------------------------
 from __future__ import annotations
 import re
 from typing import Dict, Any, List
 UNKNOWN = "Unknown"
 # ------------------------------------------------------------
 # Core fields and sugar mapping
 # ------------------------------------------------------------
 SUGAR_FIELDS: Dict[str, str] = {
     "glucose": "Glucose Fermentation",
     "lactose": "Lactose Fermentation",
 }
 CORE_BOOL_FIELDS: Dict[str, List[str]] = {
     "Catalase": ["catalase"],
     "Oxidase": ["oxidase"],
     "Indole": ["indole"],
     "Urease": ["urease"],
     "Citrate": ["citrate"],
     "Methyl Red": ["methyl red", "mr test", "mr"],
     "VP": ["voges-proskauer", "vp test", "vp"],
     "H2S": ["h2s", "hydrogen sulfide"],
     "DNase": [
+        "dnase", "dnase test", "dnase activity",
+        "dnase production", "dnaase", "dna hydrolysis"
     ],
     "ONPG": ["onpg"],
     "Coagulase": ["coagulase"],
     "Ornitihine Decarboxylase": ["ornithine decarboxylase", "ornithine decarb"],
     "Arginine dihydrolase": ["arginine dihydrolase"],
     "Gelatin Hydrolysis": ["gelatin hydrolysis", "gelatinase"],
     "Esculin Hydrolysis": ["esculin hydrolysis", "esculin"],
 }
 # ------------------------------------------------------------
 def _clean_text(text: str) -> str:
     if not text:
         return ""
     s = text.replace("°", "").replace("º", "")
     s = s.replace("₂", "2")
     return " ".join(s.split())
 def _norm(s: str) -> str:
     return s.strip().lower()
 def _set_if_stronger(parsed: Dict[str, str], field: str, value: str) -> None:
     if not value:
         return
     if field not in parsed or parsed[field] == UNKNOWN:
         parsed[field] = value
 def _value_from_pnv_token(token: str) -> str | None:
     seg = _norm(token)
     if seg in ["positive", "pos", "+"]:
         return "Positive"
         return "Variable"
     return None
 def _value_from_pnv_context(segment: str) -> str | None:
     seg = _norm(segment)
     val = _value_from_pnv_token(seg)
     if val:
         return val
     m = re.search(r"\bis\s+(positive|negative|variable|pos|neg|\+|\-)\b", seg)
     if m:
         return _value_from_pnv_token(m.group(1))
     return None
 # ------------------------------------------------------------
 # Gram stain and shape
 # ------------------------------------------------------------
         _set_if_stronger(parsed, "Gram Stain", "Variable")
     # Shape
     if "short rods" in text_lc:
         _set_if_stronger(parsed, "Shape", "Short Rods")
     if re.search(r"\bcocci\b", text_lc):
         _set_if_stronger(parsed, "Shape", "Cocci")
     if re.search(r"\b(diplococci|tetracocci|streptococci|staphylococci)\b", text_lc):
         _set_if_stronger(parsed, "Shape", "Cocci")
     if re.search(r"\brods?\b", text_lc) or "bacilli" in text_lc:
         _set_if_stronger(parsed, "Shape", "Rods")
     if "spiral" in text_lc or "spirochete" in text_lc:
         _set_if_stronger(parsed, "Shape", "Spiral")
 # ------------------------------------------------------------
 def _parse_haemolysis(text_lc: str, parsed: Dict[str, str]) -> None:
     # Beta
     if re.search(r"beta[- ]?(haemolytic|hemolytic|haemolysis|hemolysis)", text_lc):
         _set_if_stronger(parsed, "Haemolysis Type", "Beta")
     if re.search(r"gamma[- ]?(haemolytic|hemolytic|haemolysis|hemolysis)", text_lc):
         _set_if_stronger(parsed, "Haemolysis Type", "Gamma")
         _set_if_stronger(parsed, "Haemolysis", "Negative")
     if (
         "non-haemolytic" in text_lc
         or "non hemolytic" in text_lc
       - "catalase positive"
       - "positive for catalase"
       - "catalase is positive"
+      - "indole reaction is negative"
+      - "indole reaction negative"
+      - "indole test reaction is positive"
+    Plus:
       - NaCl tolerance with % values
       - Nitrate reduction text
       - H2S production / non-production
+      - DNase coverage
+      - gelatinase → Gelatin Hydrolysis
     """
     for field, keywords in CORE_BOOL_FIELDS.items():
         for kw in keywords:
+            # 1) "... catalase positive"
             m1 = re.search(
                 rf"{re.escape(kw)}[ \-]?"
                 r"(positive|negative|variable|pos|neg|\+|\-)",
                     _set_if_stronger(parsed, field, val)
                     break
+            # 2) "positive for catalase"
             m2 = re.search(
                 rf"(positive|negative|variable|pos|neg|\+|\-)\s+"
                 rf"(for\s+)?{re.escape(kw)}",
                     _set_if_stronger(parsed, field, val)
                     break
+            # 3) "<kw> is positive"
             m3 = re.search(
                 rf"{re.escape(kw)}\s+is\s+"
                 r"(positive|negative|variable|pos|neg|\+|\-)",
                     _set_if_stronger(parsed, field, val)
                     break
+            # 4) NEW: "<kw> reaction is positive/negative"
+            m4 = re.search(
+                rf"{re.escape(kw)}\s+reaction\s+is\s+"
+                r"(positive|negative|variable|pos|neg|\+|\-)",
+                text_lc,
+            )
+            if m4:
+                val = _value_from_pnv_token(m4.group(1))
+                if val:
+                    _set_if_stronger(parsed, field, val)
+                    break
+            # 5) NEW: "<kw> reaction positive/negative"
+            m5 = re.search(
+                rf"{re.escape(kw)}\s+reaction\s+"
+                r"(positive|negative|variable|pos|neg|\+|\-)",
+                text_lc,
+            )
+            if m5:
+                val = _value_from_pnv_token(m5.group(1))
+                if val:
+                    _set_if_stronger(parsed, field, val)
+                    break
+            # 6) NEW: "<kw> test reaction is positive"
+            m6 = re.search(
+                rf"{re.escape(kw)}\s+test\s+reaction\s+is\s+"
+                r"(positive|negative|variable|pos|neg|\+|\-)",
+                text_lc,
+            )
+            if m6:
+                val = _value_from_pnv_token(m6.group(1))
+                if val:
+                    _set_if_stronger(parsed, field, val)
+                    break
         # Special-case NaCl tolerance with explicit percentages
         if field == "NaCl Tolerant (>=6%)":
             for m in re.finditer(
                 r"(grows|growth)\s+(in|at)\s*(\d+(?:\.\d+)?)\s*%?\s*nacl",
                 text_lc,
                 except Exception:
                     pass
             for m in re.finditer(
                 r"nacl\s+tolerant\s+(?:to|up to)?\s*(\d+(?:\.\d+)?)\s*%?",
                 text_lc,
                 except Exception:
                     pass
             if re.search(
                 r"does (not|n't) grow\s+(in|at)\s*(\d+(?:\.\d+)?)\s*%?\s*nacl",
                 text_lc,
             ):
                 _set_if_stronger(parsed, "NaCl Tolerant (>=6%)", "Negative")
             for m in re.finditer(
                 r"\bin\s*(\d+(?:\.\d+)?)\s*%?\s*nacl",
                 text_lc,
                 except Exception:
                     pass
+    # Nitrate
     if re.search(r"reduces nitrate", text_lc):
         _set_if_stronger(parsed, "Nitrate Reduction", "Positive")
     if re.search(r"does (not|n't) reduce nitrate", text_lc):
         _set_if_stronger(parsed, "Nitrate Reduction", "Negative")
+    # H2S
     if re.search(r"(produces|production of)\s+h2s", text_lc):
         _set_if_stronger(parsed, "H2S", "Positive")
     if re.search(r"h2s production\s+is\s+(positive|pos|\+)", text_lc):
     ):
         _set_if_stronger(parsed, "H2S", "Negative")
+    # DNase
     if re.search(r"\bdnase(\s+test|\s+activity|\s+production)?\s*(positive|pos|\+)\b", text_lc):
         _set_if_stronger(parsed, "DNase", "Positive")
     if re.search(r"\b(positive|pos|\+)\s+dnase(\s+test|\s+activity|\s+production)?\b", text_lc):
         _set_if_stronger(parsed, "DNase", "Positive")
     if re.search(r"\bdnase(\s+test|\s+activity|\s+production)?\s*(negative|neg|\-)\b", text_lc):
         _set_if_stronger(parsed, "DNase", "Negative")
     if re.search(r"\b(negative|neg|\-)\s+dnase(\s+test|\s+activity|\s+production)?\b", text_lc):
         _set_if_stronger(parsed, "DNase", "Negative")
     if re.search(r"\bnon[- ]?dnase[- ]?producing\b", text_lc):
         _set_if_stronger(parsed, "DNase", "Negative")
+    # Gelatinase → Gelatin Hydrolysis
     if re.search(r"\bgelatinase\s*(positive|pos|\+)\b", text_lc):
         _set_if_stronger(parsed, "Gelatin Hydrolysis", "Positive")
     if re.search(r"\bgelatinase\s*(negative|neg|\-)\b", text_lc):
         _set_if_stronger(parsed, "Gelatin Hydrolysis", "Negative")
     ):
         _set_if_stronger(parsed, "Motility", "Negative")
     if (
         "tumbling motility" in text_lc
         or "swarming motility" in text_lc
     ):
         _set_if_stronger(parsed, "Motility", "Positive")
+    # Capsule
     if (
         "capsulated" in text_lc
         or "encapsulated" in text_lc
         _set_if_stronger(parsed, "Capsule", "Negative")
     # Spore formation
     if (
         re.search(r"\bnon[-\s]?spore[-\s]?forming\b", text_lc)
         or "no spores" in text_lc
     ):
         _set_if_stronger(parsed, "Spore Formation", "Negative")
+        return
     if (
         re.search(r"\bspore[-\s]?forming\b", text_lc)
         or "forms spores" in text_lc
 # ------------------------------------------------------------
 def _parse_oxygen(text_lc: str, parsed: Dict[str, str]) -> None:
     if re.search(r"facultative(ly)? anaerob", text_lc):
         _set_if_stronger(parsed, "Oxygen Requirement", "Facultative Anaerobe")
     if (
         re.search(r"\bobligate anaerob", text_lc)
         or (re.search(r"\banaerobic\b", text_lc) and "facultative" not in text_lc)
     ):
         _set_if_stronger(parsed, "Oxygen Requirement", "Anaerobic")
     if (
         re.search(r"\bobligate aerobe\b", text_lc)
         or (re.search(r"\baerobic\b", text_lc) and "anaerobic" not in text_lc)
+        or (re.search(r"\baerobically\b", text_lc) and "anaerobically" not in text_lc)
     ):
         _set_if_stronger(parsed, "Oxygen Requirement", "Aerobic")
 # ------------------------------------------------------------
 def _parse_growth_temperature(text_lc: str, parsed: Dict[str, str]) -> None:
     range_pattern = re.compile(
         r"(\d+)\s*[-–/]\s*(\d+)\s*(?:c|°c|degrees c|degrees celsius)"
     )
         _set_if_stronger(parsed, "Growth Temperature", f"{low}//{high}")
         return
     temps = re.findall(r"(\d+)\s*(?:c|°c|degrees c|degrees celsius)", text_lc)
     if len(temps) >= 2:
         nums = [int(t) for t in temps]
         _set_if_stronger(parsed, "Growth Temperature", f"{low}//{high}")
         return
     single_pattern = re.compile(
         r"(grows|growth|optimum|optimal)\s+(?:at\s+)?(\d+)\s*"
         r"(?:c|°c|degrees c|degrees celsius)"
         _set_if_stronger(parsed, "Growth Temperature", f"{temp}//{temp}")
         return
     m_simple_num = re.search(r"grows at (\d+)\b", text_lc)
     if m_simple_num:
         temp = m_simple_num.group(1)
         _set_if_stronger(parsed, "Growth Temperature", f"{temp}//{temp}")
         return
     m_plain = re.search(
         r"\b(\d+)\s*(?:c|°c|degrees c|degrees celsius)\b",
         text_lc,
 # ------------------------------------------------------------
+# Media grown on
 # ------------------------------------------------------------
 MEDIA_KEYWORDS = {
+    "Blood Agar": ["blood agar", "blood-agar"],
+    "MacConkey Agar": ["macconkey agar", "mac conkey agar", "macconkey"],
+    "Chocolate Agar": ["chocolate agar", "chocolate-agar"],
+    "Nutrient Agar": ["nutrient agar", "nutrient-agar"],
+    "XLD Agar": ["xld agar"],
+    "TCBS Agar": ["tcbs agar", "tcbs"],
+    "ALOA": ["aloa agar", "aloa"],
+    "BCYE Agar": ["bcye agar", "bcye"],
+    "MRS Agar": ["mrs agar"],
 }
 def _parse_media(text_lc: str, parsed: Dict[str, str]) -> None:
     found_media: List[str] = []
     for media_name, patterns in MEDIA_KEYWORDS.items():
 # ------------------------------------------------------------
+# Sugar fermentation
 # ------------------------------------------------------------
 def _parse_sugars(text_lc: str, parsed: Dict[str, str]) -> None:
+    # 0) Simple "<sugar> positive/negative" and "<sugar> is positive"
     for sugar_key, field in SUGAR_FIELDS.items():
         m_simple = re.search(
+            rf"{sugar_key}\s+(positive|negative|variable|pos|neg|\+|\-)",
+            text_lc,
         )
         if m_simple:
             val = _value_from_pnv_context(m_simple.group(1))
             if val:
                 _set_if_stronger(parsed, field, val)
         m_is = re.search(
             rf"{sugar_key}\s+is\s+(positive|negative|variable|pos|neg|\+|\-)",
             text_lc,
             if val:
                 _set_if_stronger(parsed, field, val)
+    # 0b) "<sugar> fermenter" vs "non-<sugar> fermenter"
     for sugar_key, field in SUGAR_FIELDS.items():
         if re.search(rf"\b{sugar_key}\s+fermenter\b", text_lc) and not re.search(
             rf"\bnon[- ]{sugar_key}\s+fermenter\b", text_lc
         ):
             _set_if_stronger(parsed, field, "Positive")
         if re.search(rf"\bnon[- ]{sugar_key}\s+fermenter\b", text_lc):
             _set_if_stronger(parsed, field, "Negative")
     ferments_pattern = re.compile(r"ferments\s+([a-z0-9 ,;/&\-]+)")
     for m in ferments_pattern.finditer(text_lc):
         seg = m.group(1)
         neg_split = re.split(r"\bbut not\b", seg, maxsplit=1)
         pos_part = neg_split[0]
         neg_part = neg_split[1] if len(neg_split) > 1 else ""
         for sugar_key, field in SUGAR_FIELDS.items():
             if re.search(rf"\b{sugar_key}\b", pos_part):
                 _set_if_stronger(parsed, field, "Positive")
         for sugar_key, field in SUGAR_FIELDS.items():
             if re.search(rf"\b{sugar_key}\b", neg_part):
                 _set_if_stronger(parsed, field, "Negative")
+    # 2) Grouped "does not ferment X, Y and Z" (stop at but/punctuation)
     grouped_neg_pattern = re.compile(
         r"does\s+(?:not|n't)\s+ferment\s+([a-z0-9 ,;/&\-]+?)(?:\s+but\b|\.|;|,|$)"
     )
             if re.search(rf"\b{sugar_key}\b", seg):
                 _set_if_stronger(parsed, field, "Negative")
+    # 3) Single "does not ferment X"
     for sugar_key, field in SUGAR_FIELDS.items():
         if re.search(
             rf"does\s+(?:not|n't)\s+ferment\s+{sugar_key}\b", text_lc
         ):
             _set_if_stronger(parsed, field, "Negative")
+    # 4) "non-lactose fermenter"
     for sugar_key, field in SUGAR_FIELDS.items():
         if re.search(
             rf"non[- ]{sugar_key}\s+ferment(ing|er)?", text_lc
         ):
             _set_if_stronger(parsed, field, "Negative")
+    # 5) "<sugar> fermentation positive/negative" + "is positive"
     for sugar_key, field in SUGAR_FIELDS.items():
         m1 = re.search(
             rf"{sugar_key}\s+fermentation[ \-]?"
             r"(positive|negative|variable|pos|neg|\+|\-)",
                 _set_if_stronger(parsed, field, val)
                 continue
         m2 = re.search(
             rf"(positive|negative|variable|pos|neg|\+|\-)\s+"
             rf"(for\s+)?{sugar_key}\s+fermentation",
                 _set_if_stronger(parsed, field, val)
                 continue
         m3 = re.search(
             rf"{sugar_key}\s+fermentation\s+is\s+"
             r"(positive|negative|variable|pos|neg|\+|\-)",
                 _set_if_stronger(parsed, field, val)
                 continue
+    # 6) Global non-fermenter phrases
     if (
         re.search(
             r"does\s+(?:not|n't)\s+ferment\s+(carbohydrates|sugars)", text_lc
         for field in SUGAR_FIELDS.values():
             if field not in parsed or parsed[field] == UNKNOWN:
                 _set_if_stronger(parsed, field, "Negative")
 # ------------------------------------------------------------
+# Colony morphology
 # ------------------------------------------------------------
 def _parse_colony(text_lc: str, parsed: Dict[str, str]) -> None:
     m = re.search(r"colon(y|ies)\s+(are|is)\s+([a-z0-9 ,;\-]+)", text_lc)
     if m:
         desc = m.group(3).strip()
                 _set_if_stronger(parsed, "Colony Morphology", pretty)
                 return
     m2 = re.search(
         r"colonies\s+([a-z0-9 ,;\-]+?)(?:\s+on\b|\.|,)",
         text_lc,
                 _set_if_stronger(parsed, "Colony Morphology", pretty)
                 return
     m3 = re.search(
         r"(forming|forms|produces)\s+([a-z0-9 ,;\-]+?)\s+colonies",
         text_lc,
         }
     except Exception as e:
         return {
             "parsed_fields": parsed,
             "source": "rule_parser",
             "raw": original,
             "error": f"{type(e).__name__}: {e}",
+        }