Spaces:

EphAsad
/

BactKing

Sleeping

App Files Files Community

EphAsad commited on Dec 28, 2025

Commit

a7f8964

verified ·

1 Parent(s): 24f813a

Update training/field_weight_trainer.py

Browse files

Files changed (1) hide show

training/field_weight_trainer.py +136 -84

training/field_weight_trainer.py CHANGED Viewed

@@ -2,9 +2,22 @@
 # ------------------------------------------------------------
 # Stage 12A — Train Per-Field Parser Weights from Gold Tests
 #
-# LLM is evaluated in REPAIR MODE when enabled:
-#   - rules + extended first
-#   - LLM receives existing_fields
 # ------------------------------------------------------------
 from __future__ import annotations
@@ -20,11 +33,11 @@ from typing import Any, Dict, List, Optional, Tuple
 from engine.parser_rules import parse_text_rules
 from engine.parser_ext import parse_text_extended
-# LLM parser (optional, repair-only)
 try:
     from engine.parser_llm import parse_llm as parse_text_llm_local
 except Exception:
-    parse_text_llm_local = None
 # ------------------------------------------------------------
@@ -63,7 +76,9 @@ class FieldStats:
         if self.total() == 0:
             return 0.0
         denom = self.correct + self.wrong + missing_penalty * self.missing
-        return self.correct / denom if denom > 0 else 0.0
 # ------------------------------------------------------------
@@ -88,64 +103,58 @@ def _extract_text_and_expected(test_obj: Dict[str, Any]) -> Tuple[str, Dict[str,
         or test_obj.get("raw")
         or ""
     )
-    text = text if isinstance(text, str) else str(text)
     expected: Dict[str, str] = {}
-    for key in ("expected", "expected_core", "expected_extended"):
-        block = test_obj.get(key)
-        if isinstance(block, dict):
-            for k, v in block.items():
-                expected[str(k)] = str(v)
     return text, expected
 # ------------------------------------------------------------
-# Parser Execution (LLM = repair-only)
 # ------------------------------------------------------------
 def _get_parser_predictions(text: str, include_llm: bool = True) -> Dict[str, Dict[str, str]]:
     results: Dict[str, Dict[str, str]] = {}
-    # 1) Rules
     r = parse_text_rules(text)
-    rules_fields = dict(r.get("parsed_fields", {}))
-    results["rules"] = rules_fields
-    # 2) Extended
     e = parse_text_extended(text)
-    ext_fields = dict(e.get("parsed_fields", {}))
-    results["extended"] = ext_fields
-    # 3) LLM (repair mode)
-    llm_fields: Dict[str, str] = {}
     if include_llm and parse_text_llm_local is not None:
         try:
-            merged_existing = {}
-            merged_existing.update(rules_fields)
-            merged_existing.update(ext_fields)
-            llm_out = parse_text_llm_local(
-                text,
-                existing_fields=merged_existing,
-            )
-            if isinstance(llm_out, dict):
-                llm_fields = dict(llm_out.get("parsed_fields", {}))
         except Exception:
-            llm_fields = {}
-    results["llm"] = llm_fields
     return results
 def _outcome_for_field(expected_val: str, predicted_val: Optional[str]) -> ParserOutcome:
     if predicted_val is None:
-        return ParserOutcome(None, False, False, True)
     if predicted_val == expected_val:
-        return ParserOutcome(predicted_val, True, False, False)
-    return ParserOutcome(predicted_val, False, True, False)
 # ------------------------------------------------------------
@@ -158,6 +167,7 @@ def _compute_stats_from_gold(
 ):
     field_stats = defaultdict(lambda: defaultdict(FieldStats))
     global_stats = defaultdict(FieldStats)
     total_samples = 0
     for sample in gold_tests:
@@ -169,67 +179,85 @@ def _compute_stats_from_gold(
         preds = _get_parser_predictions(text, include_llm=include_llm)
         for field, expected_val in expected.items():
-            for parser_name in ("rules", "extended", "llm"):
                 if parser_name == "llm" and not include_llm:
                     continue
                 pred_val = preds.get(parser_name, {}).get(field)
                 outcome = _outcome_for_field(expected_val, pred_val)
                 fs = field_stats[field][parser_name]
-                gs = global_stats[parser_name]
                 if outcome.correct:
                     fs.correct += 1
-                    gs.correct += 1
-                elif outcome.wrong:
                     fs.wrong += 1
-                    gs.wrong += 1
-                else:
                     fs.missing += 1
                     gs.missing += 1
     return field_stats, global_stats, total_samples
-# ------------------------------------------------------------
-# Weight Construction
-# ------------------------------------------------------------
-def _normalise(weights: Dict[str, float]) -> Dict[str, float]:
-    adjusted = {k: max(SMOOTHING, v) for k, v in weights.items()}
     total = sum(adjusted.values())
-    return {k: v / total for k, v in adjusted.items()} if total > 0 else {}
-def _build_weights_json(field_stats, global_stats, total_samples, include_llm=True):
-    raw_global = {
-        p: stats.score(MISSING_PENALTY)
-        for p, stats in global_stats.items()
-        if include_llm or p != "llm"
-    }
     global_weights = _normalise(raw_global)
     fields_block = {}
-    for field, stats_dict in field_stats.items():
-        raw_scores = {
-            p: s.score(MISSING_PENALTY)
-            for p, s in stats_dict.items()
-            if include_llm or p != "llm"
         }
-        support = sum(s.total() for s in stats_dict.values())
-        weights = (
-            _normalise(raw_scores)
-            if support >= 5
-            else _normalise({
-                p: 0.7 * global_weights.get(p, 0.0) + 0.3 * raw_scores.get(p, 0.0)
-                for p in global_weights
-            })
-        )
-        fields_block[field] = {**weights, "support": support}
     return {
         "global": global_weights,
@@ -237,8 +265,8 @@ def _build_weights_json(field_stats, global_stats, total_samples, include_llm=Tr
         "meta": {
             "total_samples": total_samples,
             "missing_penalty": MISSING_PENALTY,
             "include_llm": include_llm,
-            "llm_mode": "repair-only",
         },
     }
@@ -252,14 +280,28 @@ def train_field_weights(
     out_path: str = DEFAULT_OUT_PATH,
     include_llm: bool = False,
 ):
     gold = _load_gold_tests(gold_path)
-    field_stats, global_stats, total = _compute_stats_from_gold(gold, include_llm)
-    weights = _build_weights_json(field_stats, global_stats, total, include_llm)
-    os.makedirs(os.path.dirname(out_path), exist_ok=True)
     with open(out_path, "w", encoding="utf-8") as f:
-        json.dump(weights, f, indent=2)
     return weights
@@ -267,12 +309,22 @@ def train_field_weights(
 # CLI
 # ------------------------------------------------------------
-def main():
-    p = argparse.ArgumentParser()
     p.add_argument("--include-llm", action="store_true")
-    args = p.parse_args()
-    train_field_weights(include_llm=args.include_llm)
 if __name__ == "__main__":
-    main()

 # ------------------------------------------------------------
 # Stage 12A — Train Per-Field Parser Weights from Gold Tests
 #
+# Produces:
+#   data/field_weights.json
+#
+# This script computes reliability scores for:
+#   - parser_rules
+#   - parser_ext
+#   - parser_llm
+#
+# and outputs:
+#   {
+#     "global": { ... },
+#     "fields": { field -> weights },
+#     "meta": { ... }
+#   }
+#
+# These weights are used by parser_fusion (Stage 12B).
 # ------------------------------------------------------------
 from __future__ import annotations
 from engine.parser_rules import parse_text_rules
 from engine.parser_ext import parse_text_extended
+# LLM parser (optional)
 try:
     from engine.parser_llm import parse_llm as parse_text_llm_local
 except Exception:
+    parse_text_llm_local = None  # gracefully degrade if LLM unavailable
 # ------------------------------------------------------------
         if self.total() == 0:
             return 0.0
         denom = self.correct + self.wrong + missing_penalty * self.missing
+        if denom == 0:
+            return 0.0
+        return self.correct / denom
 # ------------------------------------------------------------
         or test_obj.get("raw")
         or ""
     )
+    if not isinstance(text, str):
+        text = str(text)
     expected: Dict[str, str] = {}
+    if isinstance(test_obj.get("expected"), dict):
+        for k, v in test_obj["expected"].items():
+            expected[str(k)] = str(v)
+        return text, expected
+    if isinstance(test_obj.get("expected_core"), dict):
+        for k, v in test_obj["expected_core"].items():
+            expected[str(k)] = str(v)
+    if isinstance(test_obj.get("expected_extended"), dict):
+        for k, v in test_obj["expected_extended"].items():
+            expected[str(k)] = str(v)
     return text, expected
 # ------------------------------------------------------------
+# Parser Execution
 # ------------------------------------------------------------
 def _get_parser_predictions(text: str, include_llm: bool = True) -> Dict[str, Dict[str, str]]:
     results: Dict[str, Dict[str, str]] = {}
     r = parse_text_rules(text)
+    results["rules"] = dict(r.get("parsed_fields", {}))
     e = parse_text_extended(text)
+    results["extended"] = dict(e.get("parsed_fields", {}))
+    llm_values: Dict[str, str] = {}
     if include_llm and parse_text_llm_local is not None:
         try:
+            llm_out = parse_text_llm_local(text)
+            llm_values = dict(llm_out.get("parsed_fields", {}))
         except Exception:
+            llm_values = {}
+    results["llm"] = llm_values
     return results
 def _outcome_for_field(expected_val: str, predicted_val: Optional[str]) -> ParserOutcome:
     if predicted_val is None:
+        return ParserOutcome(prediction=None, correct=False, wrong=False, missing=True)
     if predicted_val == expected_val:
+        return ParserOutcome(prediction=predicted_val, correct=True, wrong=False, missing=False)
+    return ParserOutcome(prediction=predicted_val, correct=False, wrong=True, missing=False)
 # ------------------------------------------------------------
 ):
     field_stats = defaultdict(lambda: defaultdict(FieldStats))
     global_stats = defaultdict(FieldStats)
     total_samples = 0
     for sample in gold_tests:
         preds = _get_parser_predictions(text, include_llm=include_llm)
         for field, expected_val in expected.items():
+            expected_val = str(expected_val)
+            for parser_name in ["rules", "extended", "llm"]:
                 if parser_name == "llm" and not include_llm:
                     continue
                 pred_val = preds.get(parser_name, {}).get(field)
                 outcome = _outcome_for_field(expected_val, pred_val)
                 fs = field_stats[field][parser_name]
                 if outcome.correct:
                     fs.correct += 1
+                if outcome.wrong:
                     fs.wrong += 1
+                if outcome.missing:
                     fs.missing += 1
+                gs = global_stats[parser_name]
+                if outcome.correct:
+                    gs.correct += 1
+                if outcome.wrong:
+                    gs.wrong += 1
+                if outcome.missing:
                     gs.missing += 1
     return field_stats, global_stats, total_samples
+def _normalise(weights: Dict[str, float], smoothing: float = SMOOTHING) -> Dict[str, float]:
+    adjusted = {k: max(smoothing, v) for k, v in weights.items()}
     total = sum(adjusted.values())
+    if total <= 0:
+        n = len(adjusted)
+        return {k: 1.0 / n for k in adjusted}
+    return {k: v / total for k, v in adjusted.items()}
+def _build_weights_json(
+    field_stats,
+    global_stats,
+    total_samples,
+    include_llm=True,
+):
+    # Global scores
+    raw_global = {}
+    for parser_name, stats in global_stats.items():
+        if parser_name == "llm" and not include_llm:
+            continue
+        raw_global[parser_name] = stats.score(MISSING_PENALTY)
     global_weights = _normalise(raw_global)
+    # Per-field
     fields_block = {}
+    for field_name, stats_dict in field_stats.items():
+        raw_scores = {}
+        total_support = 0
+        for parser_name, stats in stats_dict.items():
+            if parser_name == "llm" and not include_llm:
+                continue
+            raw_scores[parser_name] = stats.score(MISSING_PENALTY)
+            total_support += stats.total()
+        if total_support < 5:
+            # low support → blend global + local
+            local_norm = _normalise(raw_scores)
+            mixed = {}
+            for p in global_weights:
+                mixed[p] = 0.7 * global_weights[p] + 0.3 * local_norm.get(p, global_weights[p])
+            field_w = _normalise(mixed)
+        else:
+            field_w = _normalise(raw_scores)
+        fields_block[field_name] = {
+            **field_w,
+            "support": total_support,
         }
     return {
         "global": global_weights,
         "meta": {
             "total_samples": total_samples,
             "missing_penalty": MISSING_PENALTY,
+            "smoothing": SMOOTHING,
             "include_llm": include_llm,
         },
     }
     out_path: str = DEFAULT_OUT_PATH,
     include_llm: bool = False,
 ):
+    print(f"[12A] Loading gold tests: {gold_path}")
     gold = _load_gold_tests(gold_path)
+    print(f"[12A] {len(gold)} gold samples loaded")
+    field_stats, global_stats, total_samples = _compute_stats_from_gold(
+        gold, include_llm=include_llm
+    )
+    print("[12A] Computing weights...")
+    weights = _build_weights_json(
+        field_stats, global_stats, total_samples, include_llm=include_llm
+    )
+    out_dir = os.path.dirname(out_path)
+    if out_dir and not os.path.exists(out_dir):
+        os.makedirs(out_dir, exist_ok=True)
+    print(f"[12A] Writing: {out_path}")
     with open(out_path, "w", encoding="utf-8") as f:
+        json.dump(weights, f, indent=2, ensure_ascii=False)
+    print("[12A] Done.")
     return weights
 # CLI
 # ------------------------------------------------------------
+def _parse_args(argv=None):
+    p = argparse.ArgumentParser(description="Stage 12A — Train parser weights")
+    p.add_argument("--gold", type=str, default=DEFAULT_GOLD_PATH)
+    p.add_argument("--out", type=str, default=DEFAULT_OUT_PATH)
     p.add_argument("--include-llm", action="store_true")
+    return p.parse_args(argv)
+def main(argv=None):
+    args = _parse_args(argv)
+    train_field_weights(
+        gold_path=args.gold,
+        out_path=args.out,
+        include_llm=args.include_llm,
+    )
 if __name__ == "__main__":
+    main()