EphAsad commited on
Commit
b1deeea
·
verified ·
1 Parent(s): 8784ac2

Update engine/parser_ext.py

Browse files
Files changed (1) hide show
  1. engine/parser_ext.py +263 -218
engine/parser_ext.py CHANGED
@@ -1,27 +1,19 @@
1
  # engine/parser_ext.py
2
  # ------------------------------------------------------------
3
- # Extended parser for non-core / specialty tests.
4
  #
5
- # Focus: fields that are NOT part of the strict core schema
6
- # and usually live in data/extended_schema.json, e.g.:
 
 
 
7
  #
8
- # - CAMP, PYR, Hippurate Hydrolysis
9
- # - Bile Solubility, Bile Resistance
10
- # - Optochin, Bacitracin, Novobiocin (disc tests)
11
- # - Odour
12
- # - NaCl Tolerant (>=10%), NaCl Tolerant (>=15%)
13
- # - Lipase, Lecithinase, etc. IF present in extended_schema
14
- #
15
- # It returns:
16
- # {
17
- # "parsed_fields": { field: value, ... },
18
- # "source": "extended_parser",
19
- # "raw": original_text,
20
- # "error": optional_error_message
21
- # }
22
- #
23
- # Stage 11B: safer alias usage, better coverage for disc tests
24
- # and extended biochemical tests, without touching core schema.
25
  # ------------------------------------------------------------
26
 
27
  from __future__ import annotations
@@ -31,268 +23,321 @@ import os
31
  import re
32
  from typing import Dict, Any, List
33
 
 
 
34
 
35
  UNKNOWN = "Unknown"
36
 
37
- EXTENDED_SCHEMA_PATH = os.path.join("data", "extended_schema.json")
38
- ALIAS_MAP_PATH = os.path.join("data", "alias_maps.json")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
 
41
  # ------------------------------------------------------------
42
- # Loading helpers
43
  # ------------------------------------------------------------
44
 
45
- def _load_extended_schema() -> Dict[str, Any]:
46
- if not os.path.exists(EXTENDED_SCHEMA_PATH):
47
  return {}
48
  try:
49
- with open(EXTENDED_SCHEMA_PATH, "r", encoding="utf-8") as f:
50
  obj = json.load(f)
51
- return obj if isinstance(obj, dict) else {}
52
  except Exception:
53
  return {}
54
 
55
 
56
- def _load_alias_maps() -> Dict[str, str]:
57
- if not os.path.exists(ALIAS_MAP_PATH):
 
 
 
 
 
58
  return {}
59
  try:
60
- with open(ALIAS_MAP_PATH, "r", encoding="utf-8") as f:
61
  obj = json.load(f)
62
- return obj if isinstance(obj, dict) else {}
 
 
63
  except Exception:
64
  return {}
65
 
66
 
67
- def _apply_alias(field: str, value: str, alias_maps: Dict[str, str]) -> str:
68
  """
69
- Apply alias maps in a SAFE way:
70
-
71
- - key format expected: "Field:Value" -> "NormalizedValue"
72
- - if no match, return original value
73
- - we NEVER map values to some other field name here.
74
  """
75
  key = f"{field}:{value}"
76
- mapped = alias_maps.get(key)
77
- if mapped:
78
- return mapped
 
79
  return value
80
 
81
 
82
  # ------------------------------------------------------------
83
- # Generic helpers
84
  # ------------------------------------------------------------
85
 
86
- def _clean_text(text: str) -> str:
87
- return " ".join(text.split())
88
-
 
 
 
 
 
 
 
 
 
 
 
89
 
90
- def _norm(s: str) -> str:
91
- return s.strip().lower()
 
 
 
 
 
92
 
 
93
 
94
- def _set_if_stronger(parsed: Dict[str, str], field: str, value: str) -> None:
95
- if not value:
96
- return
97
- if field not in parsed or parsed[field] == UNKNOWN:
98
- parsed[field] = value
99
 
 
 
 
 
 
 
 
 
100
 
101
- def _value_from_pnv_context(segment: str) -> str | None:
102
- seg = _norm(segment)
103
- if seg in ["positive", "pos", "+"]:
104
- return "Positive"
105
- if seg in ["negative", "neg", "-"]:
106
  return "Negative"
107
- if seg in ["variable", "var", "v"]:
108
- return "Variable"
109
- return None
110
-
111
-
112
- # ------------------------------------------------------------
113
- # Extended test patterns
114
- # ------------------------------------------------------------
115
-
116
- EXT_BOOL_FIELDS: Dict[str, List[str]] = {
117
- # extended enum_PNV style tests that often appear in gold tests
118
- "CAMP": ["camp", "camp test"],
119
- "PYR": ["pyr", "pyr test"],
120
- "Hippurate Hydrolysis": ["hippurate", "hippurate hydrolysis"],
121
- "Bile Solubility": ["bile soluble", "bile solubility"],
122
- "Bile Resistance": ["bile resistant", "bile resistance"],
123
- "Lipase": ["lipase"],
124
- "Lecithinase": ["lecithinase"],
125
- "Casein Hydrolysis": ["casein hydrolysis"],
126
- "Tyrosine Hydrolysis": ["tyrosine hydrolysis"],
127
- }
128
 
129
- # Disc tests with sensitive/resistant semantics
130
- DISC_TESTS = {
131
- "Optochin": ["optochin"],
132
- "Bacitracin": ["bacitracin"],
133
- "Novobiocin": ["novobiocin"],
134
- }
135
 
136
- # NaCl tolerance (high salt)
137
- EXT_SALT_FIELDS = {
138
- "NaCl Tolerant (>=10%)": ["10% nacl", "10 % nacl"],
139
- "NaCl Tolerant (>=15%)": ["15% nacl", "15 % nacl"],
140
- }
141
 
142
- # Odour
143
- ODOUR_VALUES = {
144
- "Fruity": ["fruity odour", "fruity odor"],
145
- "Horse": ["horse odour", "horse odor", "horse stable smell"],
146
- "Foul": ["foul odour", "foul odor"],
147
- "Butyric": ["butyric odour", "butyric odor"],
148
- "Earthy": ["earthy odour", "earthy odor"],
149
- }
150
 
151
 
152
  # ------------------------------------------------------------
153
- # Parsing functions
154
  # ------------------------------------------------------------
155
 
156
- def _parse_ext_bool_tests(text_lc: str, parsed: Dict[str, str]) -> None:
157
- for field, keywords in EXT_BOOL_FIELDS.items():
158
- for kw in keywords:
159
- # "CAMP test positive"
160
- m1 = re.search(rf"{re.escape(kw)}[ \-]?(positive|negative|variable|pos|neg|\+|\-)", text_lc)
161
- if m1:
162
- val = _value_from_pnv_context(m1.group(1))
163
- if val:
164
- _set_if_stronger(parsed, field, val)
165
- break
166
-
167
- # "positive for CAMP test"
168
- m2 = re.search(
169
- rf"(positive|negative|variable|pos|neg|\+|\-)\s+(for\s+)?{re.escape(kw)}",
170
- text_lc,
171
- )
172
- if m2:
173
- val = _value_from_pnv_context(m2.group(1))
174
- if val:
175
- _set_if_stronger(parsed, field, val)
176
- break
177
-
178
-
179
- def _parse_disc_tests(text_lc: str, parsed: Dict[str, str]) -> None:
180
  """
181
- Disc tests like Optochin, Bacitracin, Novobiocin.
182
-
183
- Convention:
184
- - "sensitive" -> Positive
185
- - "susceptible" -> Positive
186
- - "resistant" -> Negative
187
  """
188
- for field, keywords in DISC_TESTS.items():
189
- for kw in keywords:
190
- # "optochin sensitive" / "optochin resistant"
191
- m = re.search(
192
- rf"{re.escape(kw)}[ \-]?(sensitive|susceptible|resistant)",
193
- text_lc,
194
- )
195
- if not m:
196
- continue
197
-
198
- word = m.group(1).lower()
199
- if word in ["sensitive", "susceptible"]:
200
- _set_if_stronger(parsed, field, "Positive")
201
- elif word == "resistant":
202
- _set_if_stronger(parsed, field, "Negative")
203
- break
204
-
205
-
206
- def _parse_salt_tolerance(text_lc: str, parsed: Dict[str, str]) -> None:
207
- for field, patterns in EXT_SALT_FIELDS.items():
208
- for p in patterns:
209
- if p in text_lc:
210
- # If explicitly says "tolerant" or "growth at"
211
- if re.search(rf"(tolerant|grows at|growth at)\s*{re.escape(p)}", text_lc):
212
- _set_if_stronger(parsed, field, "Positive")
213
- # If "no growth at 15% NaCl" etc.
214
- if re.search(rf"no growth at\s*{re.escape(p)}", text_lc):
215
- _set_if_stronger(parsed, field, "Negative")
216
-
217
-
218
- def _parse_odour(text_lc: str, parsed: Dict[str, str]) -> None:
219
- for value, patterns in ODOUR_VALUES.items():
220
- for p in patterns:
221
- if p in text_lc:
222
- _set_if_stronger(parsed, "Odour", value)
223
- break
224
-
225
-
226
- def _parse_misc_extended(text_lc: str, parsed: Dict[str, str]) -> None:
227
  """
228
- Place-holder for any additional extended patterns you want.
229
- For now, we keep it minimal to avoid accidental conflicts.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  """
231
- # Example: "acid fast" partial positivity is handled as extended field
232
- if "partial acid fast" in text_lc or "partially acid fast" in text_lc:
233
- _set_if_stronger(parsed, "Acid Fast", "Partial")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
235
 
236
  # ------------------------------------------------------------
237
- # PUBLIC API
238
  # ------------------------------------------------------------
239
 
240
  def parse_text_extended(text: str) -> Dict[str, Any]:
241
  """
242
- Parse extended (non-core) tests from description, guided by extended_schema.json.
 
 
 
 
 
243
 
244
  Returns:
245
  {
246
- "parsed_fields": {...},
247
  "source": "extended_parser",
248
- "raw": original_text,
249
- "error": optional_error
250
  }
251
  """
252
- original = text or ""
253
- text_clean = _clean_text(original)
254
- text_lc = text_clean.lower()
 
 
 
 
 
 
255
 
256
  parsed: Dict[str, str] = {}
257
 
258
- try:
259
- ext_schema = _load_extended_schema()
260
- alias_maps = _load_alias_maps()
261
-
262
- # Run pattern-based extraction
263
- _parse_ext_bool_tests(text_lc, parsed)
264
- _parse_disc_tests(text_lc, parsed)
265
- _parse_salt_tolerance(text_lc, parsed)
266
- _parse_odour(text_lc, parsed)
267
- _parse_misc_extended(text_lc, parsed)
268
-
269
- # Apply alias maps (non-destructive, only value changes)
270
- if alias_maps:
271
- for field in list(parsed.keys()):
272
- val = parsed[field]
273
- mapped_val = _apply_alias(field, val, alias_maps)
274
- parsed[field] = mapped_val
275
-
276
- # Filter out any fields that are clearly core and should
277
- # NOT live in extended land (safety).
278
- # We let the main engine ignore unknown fields anyway.
279
- if ext_schema:
280
- valid_ext_fields = set(ext_schema.keys())
281
- parsed = {
282
- f: v for (f, v) in parsed.items()
283
- if f in valid_ext_fields
284
- }
285
 
286
- return {
287
- "parsed_fields": parsed,
288
- "source": "extended_parser",
289
- "raw": original,
290
- }
291
 
292
- except Exception as e:
293
- return {
294
- "parsed_fields": parsed,
295
- "source": "extended_parser",
296
- "raw": original,
297
- "error": f"{type(e).__name__}: {e}",
298
- }
 
 
1
  # engine/parser_ext.py
2
  # ------------------------------------------------------------
3
+ # Extended test parser (Stage 11C)
4
  #
5
+ # - Focuses on *extended* tests (disc tests, rare biochemicals, etc.)
6
+ # - Uses extended_schema.json dynamically
7
+ # - Ignores core DB fields (those are handled by parser_rules)
8
+ # - Adds robust patterns for:
9
+ # CAMP, PYR, Optochin, Bacitracin, Novobiocin
10
  #
11
+ # Returns:
12
+ # {
13
+ # "parsed_fields": { ... },
14
+ # "source": "extended_parser",
15
+ # "raw": original_text
16
+ # }
 
 
 
 
 
 
 
 
 
 
 
17
  # ------------------------------------------------------------
18
 
19
  from __future__ import annotations
 
23
  import re
24
  from typing import Dict, Any, List
25
 
26
+ EXTENDED_SCHEMA_PATH = os.path.join("data", "extended_schema.json")
27
+ ALIAS_MAP_PATH = os.path.join("data", "alias_maps.json")
28
 
29
  UNKNOWN = "Unknown"
30
 
31
+ # These are the CORE / DB fields that should *not* be treated as extended.
32
+ # (Must match your bacteria_db.xlsx columns.)
33
+ CORE_FIELDS = {
34
+ "Genus",
35
+ "Species",
36
+ "Gram Stain",
37
+ "Shape",
38
+ "Colony Morphology",
39
+ "Haemolysis",
40
+ "Haemolysis Type",
41
+ "Motility",
42
+ "Capsule",
43
+ "Spore Formation",
44
+ "Growth Temperature",
45
+ "Oxygen Requirement",
46
+ "Media Grown On",
47
+ "Catalase",
48
+ "Oxidase",
49
+ "Indole",
50
+ "Urease",
51
+ "Citrate",
52
+ "Methyl Red",
53
+ "VP",
54
+ "H2S",
55
+ "DNase",
56
+ "ONPG",
57
+ "Coagulase",
58
+ "Lipase Test",
59
+ "Nitrate Reduction",
60
+ "NaCl Tolerant (>=6%)",
61
+ "Lysine Decarboxylase",
62
+ "Ornitihine Decarboxylase",
63
+ "Arginine dihydrolase",
64
+ "Gelatin Hydrolysis",
65
+ "Esculin Hydrolysis",
66
+ "Glucose Fermentation",
67
+ "Lactose Fermentation",
68
+ "Sucrose Fermentation",
69
+ "Mannitol Fermentation",
70
+ "Sorbitol Fermentation",
71
+ "Maltose Fermentation",
72
+ "Xylose Fermentation",
73
+ "Rhamnose Fermentation",
74
+ "Arabinose Fermentation",
75
+ "Raffinose Fermentation",
76
+ "Trehalose Fermentation",
77
+ "Inositol Fermentation",
78
+ }
79
 
80
 
81
  # ------------------------------------------------------------
82
+ # Helpers: load extended schema & alias maps
83
  # ------------------------------------------------------------
84
 
85
+ def _load_extended_schema(path: str = EXTENDED_SCHEMA_PATH) -> Dict[str, Any]:
86
+ if not os.path.exists(path):
87
  return {}
88
  try:
89
+ with open(path, "r", encoding="utf-8") as f:
90
  obj = json.load(f)
91
+ return obj if isinstance(obj, dict) else {}
92
  except Exception:
93
  return {}
94
 
95
 
96
+ def _load_alias_map(path: str = ALIAS_MAP_PATH) -> Dict[str, str]:
97
+ """
98
+ alias_maps.json is assumed to be a simple dict like:
99
+ { "Field:raw_value": "canonical_value", ... }
100
+ We keep this optional and conservative.
101
+ """
102
+ if not os.path.exists(path):
103
  return {}
104
  try:
105
+ with open(path, "r", encoding="utf-8") as f:
106
  obj = json.load(f)
107
+ if isinstance(obj, dict):
108
+ return obj
109
+ return {}
110
  except Exception:
111
  return {}
112
 
113
 
114
+ def _apply_field_value_alias(field: str, value: str, alias_map: Dict[str, str]) -> str:
115
  """
116
+ Apply alias mapping of the form:
117
+ "Field:raw_value" -> "canonical"
118
+ Case-insensitive on the key; value returned as-is if no mapping.
 
 
119
  """
120
  key = f"{field}:{value}"
121
+ key_lower = key.lower()
122
+ for k, v in alias_map.items():
123
+ if k.lower() == key_lower:
124
+ return v
125
  return value
126
 
127
 
128
  # ------------------------------------------------------------
129
+ # Value normalisation helpers
130
  # ------------------------------------------------------------
131
 
132
+ def _bool_from_tokens(tokens: List[str]) -> str:
133
+ """
134
+ Map "positive/sensitive/susceptible" vs "negative/resistant"
135
+ into Positive / Negative where appropriate.
136
+ """
137
+ t = " ".join(tokens).lower()
138
+
139
+ # Strong negative signals
140
+ neg_tokens = [
141
+ "negative", "no", "not", "resistant", "no zone",
142
+ "no growth", "fails to", "does not"
143
+ ]
144
+ if any(nt in t for nt in neg_tokens):
145
+ return "Negative"
146
 
147
+ # Strong positive signals
148
+ pos_tokens = [
149
+ "positive", "pos", "sensitive", "susceptible",
150
+ "clear zone", "zone of inhibition"
151
+ ]
152
+ if any(pt in t for pt in pos_tokens):
153
+ return "Positive"
154
 
155
+ return UNKNOWN
156
 
 
 
 
 
 
157
 
158
+ def _disc_result_from_phrase(phrase: str) -> str:
159
+ """
160
+ For disc tests like Novobiocin / Optochin / Bacitracin, interpret:
161
+ - 'sensitive', 'susceptible' as Positive
162
+ - 'resistant', 'no zone' as Negative
163
+ - default -> Unknown
164
+ """
165
+ ph = phrase.lower()
166
 
167
+ if any(w in ph for w in ["resistant", "no zone", "no inhibition", "no clear zone"]):
 
 
 
 
168
  return "Negative"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
+ if any(w in ph for w in ["sensitive", "susceptible", "zone of inhibition", "clear zone"]):
171
+ return "Positive"
 
 
 
 
172
 
173
+ # If explicit 'positive'/'negative' appears, handle that
174
+ if "positive" in ph:
175
+ return "Positive"
176
+ if "negative" in ph:
177
+ return "Negative"
178
 
179
+ return UNKNOWN
 
 
 
 
 
 
 
180
 
181
 
182
  # ------------------------------------------------------------
183
+ # Core pattern logic for extended tests
184
  # ------------------------------------------------------------
185
 
186
+ def _parse_disc_tests(text: str, parsed: Dict[str, str]) -> None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  """
188
+ Handle disc tests:
189
+ - Optochin
190
+ - Bacitracin
191
+ - Novobiocin
192
+ with phrasing like 'optochin sensitive', 'bacitracin resistant', etc.
 
193
  """
194
+ lower = text.lower()
195
+
196
+ disc_fields = ["Optochin", "Bacitracin", "Novobiocin"]
197
+
198
+ for test_name in disc_fields:
199
+ key = test_name.lower()
200
+ # Find segments surrounding the keyword
201
+ for m in re.finditer(rf"\b{re.escape(key)}\b[^\.,;]*", lower):
202
+ segment = lower[m.start():m.end()]
203
+ val = _disc_result_from_phrase(segment)
204
+ if val != UNKNOWN:
205
+ parsed[test_name] = val
206
+
207
+ # Also handle "<test> test positive/negative"
208
+ for m in re.finditer(rf"\b{re.escape(key)}\s+test[^\.,;]*", lower):
209
+ segment = lower[m.start():m.end()]
210
+ val = _disc_result_from_phrase(segment)
211
+ if val != UNKNOWN:
212
+ parsed[test_name] = val
213
+
214
+
215
+ def _parse_simple_PNV_test(
216
+ text: str,
217
+ test_name: str,
218
+ parsed: Dict[str, str],
219
+ extra_keywords: List[str] | None = None,
220
+ ) -> None:
221
+ """
222
+ Generic P/N/V parser for named tests (e.g. CAMP, PYR, Hippurate).
223
+ Looks for patterns like:
224
+ 'CAMP positive', 'PYR test negative'
225
+ and maps to Positive / Negative / Variable.
 
 
 
 
 
 
 
226
  """
227
+ if extra_keywords is None:
228
+ extra_keywords = []
229
+
230
+ label = test_name.lower()
231
+ lower = text.lower()
232
+
233
+ # Basic patterns: "<name> positive/negative/variable"
234
+ pat_direct = rf"\b{re.escape(label)}\b[^\.,;]*"
235
+ for m in re.finditer(pat_direct, lower):
236
+ segment = lower[m.start():m.end()]
237
+ val = _bool_from_tokens(segment.split())
238
+ if val != UNKNOWN:
239
+ parsed[test_name] = val
240
+
241
+ # Patterns like "<name> test positive/negative"
242
+ pat_test = rf"\b{re.escape(label)}\s+test[^\.,;]*"
243
+ for m in re.finditer(pat_test, lower):
244
+ segment = lower[m.start():m.end()]
245
+ val = _bool_from_tokens(segment.split())
246
+ if val != UNKNOWN:
247
+ parsed[test_name] = val
248
+
249
+ # Extra synonyms if any (e.g. "CAMP reaction", "PYR activity")
250
+ for kw in extra_keywords:
251
+ k = kw.lower()
252
+ pat_kw = rf"\b{re.escape(k)}\b[^\.,;]*"
253
+ for m in re.finditer(pat_kw, lower):
254
+ segment = lower[m.start():m.end()]
255
+ val = _bool_from_tokens(segment.split())
256
+ if val != UNKNOWN:
257
+ parsed[test_name] = val
258
+
259
+
260
+ def _parse_extended_from_schema(
261
+ text: str,
262
+ ext_schema: Dict[str, Any],
263
+ alias_map: Dict[str, str],
264
+ parsed: Dict[str, str],
265
+ ) -> None:
266
  """
267
+ Generic extended parser driven by extended_schema.json.
268
+
269
+ For each field where value_type == "enum_PNV" and not in CORE_FIELDS:
270
+ - looks for '<field> positive/negative/variable' style patterns.
271
+ - applies alias map for (field, value).
272
+ """
273
+ lower = text.lower()
274
+
275
+ for field_name, meta in ext_schema.items():
276
+ if not isinstance(meta, dict):
277
+ continue
278
+ if meta.get("value_type") != "enum_PNV":
279
+ continue
280
+ if field_name in CORE_FIELDS:
281
+ # We never treat core DB tests as "extended"
282
+ continue
283
+
284
+ label = field_name.lower()
285
+ # Very simple pattern: "<field_name> positive/negative/variable"
286
+ pat = rf"\b{re.escape(label)}\b[^\.,;]*"
287
+ for m in re.finditer(pat, lower):
288
+ segment = lower[m.start():m.end()]
289
+ val = _bool_from_tokens(segment.split())
290
+ if val == UNKNOWN:
291
+ continue
292
+ val = _apply_field_value_alias(field_name, val, alias_map)
293
+ parsed[field_name] = val
294
 
295
 
296
  # ------------------------------------------------------------
297
+ # MAIN ENTRY
298
  # ------------------------------------------------------------
299
 
300
  def parse_text_extended(text: str) -> Dict[str, Any]:
301
  """
302
+ Parse extended-only tests from the description.
303
+
304
+ This is intentionally conservative:
305
+ - Only sets a field if reasonably confident from text
306
+ - Never overwrites core parser behaviour directly
307
+ - Plays nicely with alias maps and extended_schema
308
 
309
  Returns:
310
  {
311
+ "parsed_fields": { ... },
312
  "source": "extended_parser",
313
+ "raw": original_text
 
314
  }
315
  """
316
+ if not text:
317
+ return {
318
+ "parsed_fields": {},
319
+ "source": "extended_parser",
320
+ "raw": text or "",
321
+ }
322
+
323
+ ext_schema = _load_extended_schema(EXTENDED_SCHEMA_PATH)
324
+ alias_map = _load_alias_map(ALIAS_MAP_PATH)
325
 
326
  parsed: Dict[str, str] = {}
327
 
328
+ # 1) Disc tests (Novobiocin / Optochin / Bacitracin) with rich language
329
+ _parse_disc_tests(text, parsed)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
 
331
+ # 2) CAMP & PYR & Hippurate (if present in schema/gold tests)
332
+ _parse_simple_PNV_test(text, "CAMP", parsed, extra_keywords=["CAMP reaction"])
333
+ _parse_simple_PNV_test(text, "PYR", parsed, extra_keywords=["PYR activity"])
334
+ _parse_simple_PNV_test(text, "Hippurate Hydrolysis", parsed, extra_keywords=["hippurate"])
 
335
 
336
+ # 3) Any other enum_PNV extended tests from extended_schema.json
337
+ _parse_extended_from_schema(text, ext_schema, alias_map, parsed)
338
+
339
+ return {
340
+ "parsed_fields": parsed,
341
+ "source": "extended_parser",
342
+ "raw": text,
343
+ }