EphAsad commited on
Commit
8ecb8eb
·
verified ·
1 Parent(s): 28e5df1

Update engine/parser_rules.py

Browse files
Files changed (1) hide show
  1. engine/parser_rules.py +22 -1
engine/parser_rules.py CHANGED
@@ -2,7 +2,7 @@
2
  # ------------------------------------------------------------
3
  # Rule-based core parser for microbiology descriptions.
4
  #
5
- # Stage 11F (Option A ranges + fixes):
6
  # - Always store Growth Temperature as "low//high"
7
  # • single: 37 → "37//37"
8
  # • two temps in text: min//max (e.g. "4 °C but not 45 °C" → "4//45")
@@ -12,6 +12,9 @@
12
  # - "aerobically" / "anaerobically" → Aerobic / Anaerobic
13
  # - NaCl tolerance phrases improved
14
  # - Colony morphology from "colonies dry, white and irregular on nutrient agar"
 
 
 
15
  # ------------------------------------------------------------
16
 
17
  from __future__ import annotations
@@ -210,6 +213,7 @@ def _parse_core_bool_tests(text_lc: str, parsed: Dict[str, str]) -> None:
210
  - Nitrate reduction text
211
  - H2S production / non-production
212
  - DNase universal coverage
 
213
  """
214
  for field, keywords in CORE_BOOL_FIELDS.items():
215
  for kw in keywords:
@@ -316,6 +320,14 @@ def _parse_core_bool_tests(text_lc: str, parsed: Dict[str, str]) -> None:
316
  if re.search(r"\bnon[- ]?dnase[- ]?producing\b", text_lc):
317
  _set_if_stronger(parsed, "DNase", "Negative")
318
 
 
 
 
 
 
 
 
 
319
 
320
  # ------------------------------------------------------------
321
  # Motility / Capsule / Spores
@@ -551,6 +563,7 @@ def _parse_sugars(text_lc: str, parsed: Dict[str, str]) -> None:
551
  - "ferments glucose, mannitol and sucrose but not lactose"
552
  - "does not ferment lactose"
553
  - "non-lactose fermenter"
 
554
  - global non-fermenter phrases
555
  """
556
 
@@ -565,6 +578,14 @@ def _parse_sugars(text_lc: str, parsed: Dict[str, str]) -> None:
565
  if val:
566
  _set_if_stronger(parsed, field, val)
567
 
 
 
 
 
 
 
 
 
568
  # 1) "ferments X, Y and Z but not A, B"
569
  ferments_pattern = re.compile(r"ferments\s+([a-z0-9 ,;/&\-]+)")
570
  for m in ferments_pattern.finditer(text_lc):
 
2
  # ------------------------------------------------------------
3
  # Rule-based core parser for microbiology descriptions.
4
  #
5
+ # Stage 11F (Option A ranges + fixes) + Stage 11H additions:
6
  # - Always store Growth Temperature as "low//high"
7
  # • single: 37 → "37//37"
8
  # • two temps in text: min//max (e.g. "4 °C but not 45 °C" → "4//45")
 
12
  # - "aerobically" / "anaerobically" → Aerobic / Anaerobic
13
  # - NaCl tolerance phrases improved
14
  # - Colony morphology from "colonies dry, white and irregular on nutrient agar"
15
+ # - NEW (11H):
16
+ # • "Gelatinase positive/negative" → Gelatin Hydrolysis Positive/Negative
17
+ # • "<sugar> fermenter" → <Sugar> Fermentation = Positive
18
  # ------------------------------------------------------------
19
 
20
  from __future__ import annotations
 
213
  - Nitrate reduction text
214
  - H2S production / non-production
215
  - DNase universal coverage
216
+ - NEW (11H): explicit gelatinase → Gelatin Hydrolysis mapping
217
  """
218
  for field, keywords in CORE_BOOL_FIELDS.items():
219
  for kw in keywords:
 
320
  if re.search(r"\bnon[- ]?dnase[- ]?producing\b", text_lc):
321
  _set_if_stronger(parsed, "DNase", "Negative")
322
 
323
+ # --- NEW: Gelatinase → Gelatin Hydrolysis ---
324
+ # Explicit mapping just in case generic patterns miss it
325
+ if re.search(r"\bgelatinase\s*(positive|pos|\+)\b", text_lc):
326
+ _set_if_stronger(parsed, "Gelatin Hydrolysis", "Positive")
327
+
328
+ if re.search(r"\bgelatinase\s*(negative|neg|\-)\b", text_lc):
329
+ _set_if_stronger(parsed, "Gelatin Hydrolysis", "Negative")
330
+
331
 
332
  # ------------------------------------------------------------
333
  # Motility / Capsule / Spores
 
563
  - "ferments glucose, mannitol and sucrose but not lactose"
564
  - "does not ferment lactose"
565
  - "non-lactose fermenter"
566
+ - "<sugar> fermenter" (positive)
567
  - global non-fermenter phrases
568
  """
569
 
 
578
  if val:
579
  _set_if_stronger(parsed, field, val)
580
 
581
+ # 0b) NEW: "<sugar> fermenter" → Positive (unless "non-<sugar> fermenter")
582
+ for sugar_key, field in SUGAR_FIELDS.items():
583
+ # positive: "lactose fermenter"
584
+ if re.search(rf"\b{sugar_key}\s+fermenter\b", text_lc) and not re.search(
585
+ rf"\bnon[- ]{sugar_key}\s+fermenter\b", text_lc
586
+ ):
587
+ _set_if_stronger(parsed, field, "Positive")
588
+
589
  # 1) "ferments X, Y and Z but not A, B"
590
  ferments_pattern = re.compile(r"ferments\s+([a-z0-9 ,;/&\-]+)")
591
  for m in ferments_pattern.finditer(text_lc):