Spaces:

OrganizedProgrammers
/

ApplyCRs

Sleeping

App Files Files Community

heymenn commited on 27 days ago

Commit

2b96123

1 Parent(s): b5fc740

fix minor errors

Browse files

Files changed (6) hide show

scripts/cr_parser.py +63 -7
scripts/etsi_client.py +24 -9
scripts/fetch_crs.py +13 -4
scripts/finalize_ts.py +20 -21
scripts/orchestrate_cr.py +3 -2
scripts/ts_applicator.py +317 -70

scripts/cr_parser.py CHANGED Viewed

@@ -57,6 +57,46 @@ def _style_val(p_elem):
         return None
     return pStyle.get(qn('w:val'))
 def _is_rpr_ins(ins_elem):
     """True if w:ins is inside w:rPr — a formatting change, not a content insertion."""
     p = ins_elem.getparent()
@@ -185,7 +225,7 @@ def _extract_inline_replacements(p_elem):
 # ── Table change extraction ───────────────────────────────────────────────────
-def _parse_table(tbl_elem, changes, section_heading=''):
     header = _table_header(tbl_elem)
     header_key = header[:3]  # first 3 columns enough for matching
     rows = tbl_elem.findall(qn('w:tr'))
@@ -195,14 +235,18 @@ def _parse_table(tbl_elem, changes, section_heading=''):
         # ── Tracked row insertion ─────────────────────────────────────────
         if trPr is not None and trPr.find(qn('w:ins')) is not None:
-            # Find preceding stable row for anchor
-            after_anchor = ''
             for prev_idx in range(tr_idx - 1, -1, -1):
                 prev_tr = rows[prev_idx]
                 prev_trPr = prev_tr.find(qn('w:trPr'))
                 if prev_trPr is None or prev_trPr.find(qn('w:ins')) is None:
-                    after_anchor = _row_col0(prev_tr)
-                    break
             cells = []
             for tc in tr.findall(qn('w:tc')):
@@ -244,7 +288,9 @@ def _parse_table(tbl_elem, changes, section_heading=''):
                     'kind': 'table_row',
                     'table_header': header_key,
                     'after_row_anchor': after_anchor,
                     'section_heading': section_heading,
                 },
                 'cells': cells,
             })
@@ -266,6 +312,7 @@ def _parse_table(tbl_elem, changes, section_heading=''):
                             'row_anchor': row_anchor,
                             'col_idx': col_idx,
                             'section_heading': section_heading,
                         },
                         'old': old_text,
                         'new': new_text,
@@ -292,6 +339,7 @@ def _parse_body(body, changes):
     from lxml import etree
     prev_stable_text = ''
     # ── Section-replace accumulator ───────────────────────────────────────────
     sec_del = []    # fully-deleted elements (CR del block)
@@ -340,6 +388,7 @@ def _parse_body(body, changes):
                 'del_heading': del_heading,
                 'has_del_table': has_del_table,
                 'anchor_text': sec_anchor,
             },
             'elements_xml': elements_xml,
         })
@@ -371,6 +420,7 @@ def _parse_body(body, changes):
                 'location': {
                     'kind': 'body',
                     'anchor_text': prev_stable_text,
                 },
                 'paragraphs': paras,
             })
@@ -380,6 +430,10 @@ def _parse_body(body, changes):
         tag = elem.tag.split('}')[-1] if '}' in elem.tag else elem.tag
         if tag == 'p':
             is_del = _is_deleted_para(elem)
             is_ins = _is_inserted_para(elem)
             is_empty = not _para_orig_text(elem).strip() and not _para_new_text(elem).strip()
@@ -430,13 +484,14 @@ def _parse_body(body, changes):
                         'location': {
                             'kind': 'body_para',
                             'para_context': _para_orig_text(elem).strip(),
                         },
                         'old': old_text,
                         'new': new_text,
                     })
                 orig = _para_orig_text(elem).strip()
-                if orig and not re.fullmatch(r'\[\.[\s\.]*\]', orig):
                     prev_stable_text = orig
         elif tag == 'tbl':
@@ -464,7 +519,8 @@ def _parse_body(body, changes):
                 # Table with inline cell changes
                 flush_section()
                 flush_group()
-                _parse_table(elem, changes, section_heading=prev_stable_text)
     flush_section()
     flush_group()

         return None
     return pStyle.get(qn('w:val'))
+_HEADING_NUM_RE = re.compile(r'^(\d+(?:\.\d+)*)\s+\S')
+_SKIP_MARKER_RE = re.compile(r'^[\[\(]?\s*(?:\.{3}|…)\s*[\]\)]?$')
+def _para_text_with_tabs(p_elem):
+    """Paragraph text with w:tab elements rendered as '\\t'.
+    Used for heading detection since ETSI headings store the number and title in
+    separate runs separated by <w:tab/>, which _para_orig_text would drop."""
+    parts = []
+    for node in p_elem.iter():
+        if node.tag == qn('w:t') and node.text:
+            if not any(a.tag == qn('w:ins') for a in node.iterancestors()):
+                parts.append(node.text)
+        elif node.tag == qn('w:delText') and node.text:
+            parts.append(node.text)
+        elif node.tag == qn('w:tab'):
+            parts.append('\t')
+    return ''.join(parts)
+def _heading_number(p_elem):
+    """Return dotted section number if this paragraph is a numbered heading, else None.
+    Requires the paragraph style to start with 'Heading' (case-insensitive) — this
+    prevents false positives from body paragraphs whose text starts with a digit,
+    notably bit-description lines like "1 = alphabet set." (style B30) that appear
+    in Terminal Profile sections."""
+    style = (_style_val(p_elem) or '').lower()
+    if not style.startswith('heading'):
+        return None
+    text = _para_text_with_tabs(p_elem).strip()
+    m = _HEADING_NUM_RE.match(text)
+    return m.group(1) if m else None
+def _is_skip_marker(text):
+    """True for [...] / […] / ... / … / (...) / (…) after .strip()."""
+    return bool(_SKIP_MARKER_RE.match(text.strip()))
 def _is_rpr_ins(ins_elem):
     """True if w:ins is inside w:rPr — a formatting change, not a content insertion."""
     p = ins_elem.getparent()
 # ── Table change extraction ───────────────────────────────────────────────────
+def _parse_table(tbl_elem, changes, section_heading='', section_number=''):
     header = _table_header(tbl_elem)
     header_key = header[:3]  # first 3 columns enough for matching
     rows = tbl_elem.findall(qn('w:tr'))
         # ── Tracked row insertion ─────────────────────────────────────────
         if trPr is not None and trPr.find(qn('w:ins')) is not None:
+            # Find preceding stable rows for anchor + context disambiguation
+            stable_before = []
             for prev_idx in range(tr_idx - 1, -1, -1):
                 prev_tr = rows[prev_idx]
                 prev_trPr = prev_tr.find(qn('w:trPr'))
                 if prev_trPr is None or prev_trPr.find(qn('w:ins')) is None:
+                    stable_before.append(_row_col0(prev_tr))
+                    if len(stable_before) >= 3:
+                        break
+            after_anchor = stable_before[0] if stable_before else ''
+            context_rows_before = stable_before[1:]
             cells = []
             for tc in tr.findall(qn('w:tc')):
                     'kind': 'table_row',
                     'table_header': header_key,
                     'after_row_anchor': after_anchor,
+                    'context_rows_before': context_rows_before,
                     'section_heading': section_heading,
+                    'section_number': section_number,
                 },
                 'cells': cells,
             })
                             'row_anchor': row_anchor,
                             'col_idx': col_idx,
                             'section_heading': section_heading,
+                            'section_number': section_number,
                         },
                         'old': old_text,
                         'new': new_text,
     from lxml import etree
     prev_stable_text = ''
+    current_section = ''
     # ── Section-replace accumulator ───────────────────────────────────────────
     sec_del = []    # fully-deleted elements (CR del block)
                 'del_heading': del_heading,
                 'has_del_table': has_del_table,
                 'anchor_text': sec_anchor,
+                'section_number': current_section,
             },
             'elements_xml': elements_xml,
         })
                 'location': {
                     'kind': 'body',
                     'anchor_text': prev_stable_text,
+                    'section_number': current_section,
                 },
                 'paragraphs': paras,
             })
         tag = elem.tag.split('}')[-1] if '}' in elem.tag else elem.tag
         if tag == 'p':
+            hn = _heading_number(elem)
+            if hn:
+                current_section = hn
             is_del = _is_deleted_para(elem)
             is_ins = _is_inserted_para(elem)
             is_empty = not _para_orig_text(elem).strip() and not _para_new_text(elem).strip()
                         'location': {
                             'kind': 'body_para',
                             'para_context': _para_orig_text(elem).strip(),
+                            'section_number': current_section,
                         },
                         'old': old_text,
                         'new': new_text,
                     })
                 orig = _para_orig_text(elem).strip()
+                if orig and not _is_skip_marker(orig):
                     prev_stable_text = orig
         elif tag == 'tbl':
                 # Table with inline cell changes
                 flush_section()
                 flush_group()
+                _parse_table(elem, changes, section_heading=prev_stable_text,
+                             section_number=current_section)
     flush_section()
     flush_group()

scripts/etsi_client.py CHANGED Viewed

@@ -2,8 +2,8 @@
 etsi_client.py — ETSI document download helpers for ApplyCRs.
 Provides:
-  ETSIDocFinder  — CR TDoc downloads via docbox.etsi.org
-  ETSISpecFinder — TS DOCX downloads via portal.etsi.org WKI chain
 """
 import json
@@ -27,7 +27,7 @@ def _get_proxies() -> dict:
     return {"http": proxy, "https": proxy}
-class ETSIDocFinder:
     HEADERS = {
         "User-Agent": (
             "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
@@ -156,7 +156,7 @@ class ETSIDocFinder:
         )
-class ETSISpecFinder:
     def __init__(self, eol_user: str, eol_password: str):
         self.eol_user = eol_user
         self.eol_password = eol_password
@@ -458,11 +458,26 @@ class ETSISpecFinder:
                 if not versioned_urls:
                     found_names = [u.split("/")[-1] for u in matching_urls]
-                    print(
-                        f"  wki_id={wki_id}: version tag not in filenames {found_names}, "
-                        f"using first spec-matching DOCX as fallback"
-                    )
-                    versioned_urls = matching_urls
                 matching_urls = versioned_urls

 etsi_client.py — ETSI document download helpers for ApplyCRs.
 Provides:
+  CRFetcher  — CR TDoc downloads via docbox.etsi.org
+  TSFetcher  — TS DOCX downloads via portal.etsi.org WKI chain
 """
 import json
     return {"http": proxy, "https": proxy}
+class CRFetcher:
     HEADERS = {
         "User-Agent": (
             "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
         )
+class TSFetcher:
     def __init__(self, eol_user: str, eol_password: str):
         self.eol_user = eol_user
         self.eol_password = eol_password
                 if not versioned_urls:
                     found_names = [u.split("/")[-1] for u in matching_urls]
+                    # Decode the available version from the first filename (e.g. v160500 → 16.5.0)
+                    avail_ver = None
+                    if found_names:
+                        m = re.search(r'v(\d{6})p?', found_names[0])
+                        if m:
+                            t = m.group(1)
+                            avail_ver = f"{int(t[0:2])}.{int(t[2:4])}.{int(t[4:6])}"
+                    if avail_ver:
+                        print(
+                            f"\n  *** WARNING ***\n"
+                            f"  TS {doc_id} v{version_str} is not available on the ETSI portal.\n"
+                            f"  Portal has v{avail_ver} (file: {found_names[0]}).\n"
+                            f"  Options: target v{avail_ver} in your CR, or drop the TS DOCX manually.\n"
+                        )
+                    else:
+                        print(
+                            f"  wki_id={wki_id}: version tag not in filenames {found_names}, "
+                            f"rejecting (wrong version would be downloaded)"
+                        )
+                    return None
                 matching_urls = versioned_urls

scripts/fetch_crs.py CHANGED Viewed

@@ -20,7 +20,7 @@ import sys
 import zipfile
 from pathlib import Path
-from etsi_client import ETSIDocFinder, ETSISpecFinder
 # ---------------------------------------------------------------------------
@@ -218,10 +218,13 @@ def download_cr(uid: str, cr_dir: Path, eol_user: str, eol_password: str):
     dest = cr_dir / f"{uid}.docx"
     if dest.exists():
         return dest, "already existed"
     try:
-        finder = ETSIDocFinder(eol_user, eol_password)
         url = finder.search_document(uid)
         if isinstance(url, str) and "not found" in url.lower():
             return None, f"document not found: {uid}"
@@ -373,7 +376,7 @@ def download_ts(spec_number: str, version: str, ts_dir: Path,
         return filename, "already existed"
     try:
-        finder = ETSISpecFinder(eol_user, eol_password)
         tmp_path = finder.search_document_docx(spec_number, version)
     except Exception as e:
         return None, f"download error: {e}"
@@ -391,7 +394,7 @@ def download_ts(spec_number: str, version: str, ts_dir: Path,
         dest.unlink()
         return None, f"invalid file (not a ZIP/DOCX, starts with {content[:4]!r})"
-    # Verify the TS contains the expected spec number in its first paragraph
     try:
         import docx as _docx
         _doc = _docx.Document(dest)
@@ -402,6 +405,12 @@ def download_ts(spec_number: str, version: str, ts_dir: Path,
                 f"wrong TS returned: got {first_para[:80]!r} "
                 f"(expected spec {spec_no_space})"
             )
     except Exception:
         pass  # Trust the ZIP check above

 import zipfile
 from pathlib import Path
+from etsi_client import CRFetcher, TSFetcher
 # ---------------------------------------------------------------------------
     dest = cr_dir / f"{uid}.docx"
     if dest.exists():
+        extracted = cr_dir / f"{uid}_extracted.docx"
+        if extracted.exists():
+            return extracted, "already existed"
         return dest, "already existed"
     try:
+        finder = CRFetcher(eol_user, eol_password)
         url = finder.search_document(uid)
         if isinstance(url, str) and "not found" in url.lower():
             return None, f"document not found: {uid}"
         return filename, "already existed"
     try:
+        finder = TSFetcher(eol_user, eol_password)
         tmp_path = finder.search_document_docx(spec_number, version)
     except Exception as e:
         return None, f"download error: {e}"
         dest.unlink()
         return None, f"invalid file (not a ZIP/DOCX, starts with {content[:4]!r})"
+    # Verify the TS contains the expected spec number AND version in its first paragraph
     try:
         import docx as _docx
         _doc = _docx.Document(dest)
                 f"wrong TS returned: got {first_para[:80]!r} "
                 f"(expected spec {spec_no_space})"
             )
+        if f"V{version}" not in first_para:
+            dest.unlink()
+            return None, (
+                f"wrong version returned: got {first_para[:80]!r} "
+                f"(expected V{version})"
+            )
     except Exception:
         pass  # Trust the ZIP check above

scripts/finalize_ts.py CHANGED Viewed

@@ -171,11 +171,12 @@ def _detect_meeting_separator(tbl):
     Returns the detected separator character, defaulting to '#'.
     """
     meet_col = 1  # default: standard ETSI Change History has Meeting in col 1
-    if tbl.rows:
-        for c_idx, cell in enumerate(tbl.rows[0].cells):
-            if any(kw in cell.text.lower() for kw in ('meeting', 'body', 'tsg')):
-                meet_col = c_idx
-                break
     for row in reversed(tbl.rows):
         cells = row.cells
         if len(cells) > meet_col:
@@ -196,25 +197,21 @@ class NoChangeHistoryTable(Exception):
 def find_change_history_table(ts_doc):
     """
-    Scan all tables backward from the end looking for a Change History table.
-    A match requires both:
-      - 8 or 9 columns in the last row (standard ETSI Change History layout)
-      - At least one of the keywords 'cr', 'date', 'meeting', 'rev' in the header row
-    Raises NoChangeHistoryTable (not ValueError) when none is found so callers
-    can distinguish a structural absence from an unexpected error.
     """
-    for tbl in reversed(ts_doc.tables):
-        ncols = len(tbl.rows[-1].cells)
-        if ncols not in (8, 9):
             continue
-        if tbl.rows:
-            header_text = ' '.join(c.text.strip() for c in tbl.rows[0].cells).lower()
-            header_words = set(re.findall(r'\b\w+\b', header_text))
-            if {'cr', 'date'}.issubset(header_words):
-                return tbl
     raise NoChangeHistoryTable(
         'No Change History table found in this document '
-        '(no table with 8 or 9 columns and CR/Date/Meeting/Rev headers)'
     )
@@ -233,7 +230,9 @@ def find_history_table(ts_doc):
 def update_change_history_table(ts_doc, meta, pub_yyyy_mm, old_v, new_v, rev, author, date_str):
     tbl = find_change_history_table(ts_doc)
-    ncols = len(tbl.rows[-1].cells)
     # Detect separator used in existing rows (e.g. '#' in 'SET#115', '-' in 'SET-119')
     # and reformat meeting_id accordingly so it matches the existing style.

     Returns the detected separator character, defaulting to '#'.
     """
     meet_col = 1  # default: standard ETSI Change History has Meeting in col 1
+    # row[0] is the "Change history" title; row[1] is the column header row
+    header_row = tbl.rows[1] if len(tbl.rows) > 1 else tbl.rows[0]
+    for c_idx, cell in enumerate(header_row.cells):
+        if any(kw in cell.text.lower() for kw in ('meeting', 'body', 'tsg')):
+            meet_col = c_idx
+            break
     for row in reversed(tbl.rows):
         cells = row.cells
         if len(cells) > meet_col:
 def find_change_history_table(ts_doc):
     """
+    Find the Change History table by looking for a first row whose text
+    contains "Change history" (the merged title cell that ETSI places at the
+    top of the annex table).
+    Raises NoChangeHistoryTable when no such table is found.
     """
+    for tbl in ts_doc.tables:
+        if not tbl.rows:
             continue
+        r0_text = ' '.join(c.text.strip() for c in tbl.rows[0].cells)
+        if 'Change history' in r0_text:
+            return tbl
     raise NoChangeHistoryTable(
         'No Change History table found in this document '
+        '(no table whose first row contains "Change history")'
     )
 def update_change_history_table(ts_doc, meta, pub_yyyy_mm, old_v, new_v, rev, author, date_str):
     tbl = find_change_history_table(ts_doc)
+    # row[0] is the "Change history" title (merged); row[1] is the column header row
+    header_row = tbl.rows[1] if len(tbl.rows) > 1 else tbl.rows[0]
+    ncols = len(header_row.cells)
     # Detect separator used in existing rows (e.g. '#' in 'SET#115', '-' in 'SET-119')
     # and reformat meeting_id accordingly so it matches the existing style.

scripts/orchestrate_cr.py CHANGED Viewed

@@ -169,7 +169,7 @@ def _apply_ts_group(spec_number, version, uids, ts_paths, cr_paths, spec_dir,
         print(f'  Applying {len(combined_manifest)} change(s) to {ts_in.name}...')
         try:
-            n_ok, n_skip, log_lines = apply_manifest(
                 ts_in, combined_manifest, ts_applied, author=author, date=tc_date
             )
         except Exception as e:
@@ -183,7 +183,8 @@ def _apply_ts_group(spec_number, version, uids, ts_paths, cr_paths, spec_dir,
         for line in log_lines:
             if line.strip().startswith('ERROR'):
                 errors.append(line.strip())
-        print(f'  -> Applied: {n_ok}  Skipped: {n_skip}')
         print('  Finalising metadata...')
         ts_final_or_applied = ts_applied  # fallback if finalise raises

         print(f'  Applying {len(combined_manifest)} change(s) to {ts_in.name}...')
         try:
+            n_ok, n_skip, log_lines, n_parsed, n_merged = apply_manifest(
                 ts_in, combined_manifest, ts_applied, author=author, date=tc_date
             )
         except Exception as e:
         for line in log_lines:
             if line.strip().startswith('ERROR'):
                 errors.append(line.strip())
+        print(f'  Parsed: {n_parsed} body changes (merged to {n_merged} groups)'
+              f' → Applied: {n_ok}  Skipped: {n_skip}')
         print('  Finalising metadata...')
         ts_final_or_applied = ts_applied  # fallback if finalise raises

scripts/ts_applicator.py CHANGED Viewed

@@ -51,6 +51,7 @@ _UNICODE_REPLACEMENTS = (
     ('\u2019', "'"),  # right single quote
     ('\u201c', '"'),  # left double quote
     ('\u201d', '"'),  # right double quote
 )
@@ -123,26 +124,50 @@ def _full_para_text(para):
            ''.join(t.text or '' for t in el.findall('.//' + qn('w:delText')))
-def _find_para(doc, search_text, prefer_not_in_table=False):
-    """
-    Find the first paragraph containing search_text.
-    Four levels of matching, in order of confidence:
-      1.0 — exact substring match
-      0.9 — NBSP/dash-normalised match  (_norm)
-      0.8 — whitespace-stripped match   (_norm_ws)  handles tab vs nothing in
-            structured paragraphs (refs '[27]\\t...', abbrevs 'CLT\\t...', headings '8.3\\t...')
-      0.6 — full XML text (including w:del content): handles anchors that were
-            previously deleted by tracked_modify_para in an earlier apply step
-    Returns (para, confidence) or (None, 0.0).
     """
     norm_search = _norm(search_text)
     ws_search = _norm_ws(search_text)
     candidates_exact = []
     candidates_norm = []
     candidates_ws = []
     candidates_del = []
-    for para in doc.paragraphs:
         pt = para.text
         if search_text in pt:
             candidates_exact.append(para)
@@ -151,28 +176,157 @@ def _find_para(doc, search_text, prefer_not_in_table=False):
         elif ws_search and ws_search in _norm_ws(pt):
             candidates_ws.append(para)
         else:
-            # Level 4: check full XML text (catches deleted-but-still-present paragraphs)
-            full_pt = _full_para_text(para)
-            if search_text in full_pt:
-                candidates_del.append(para)
-            elif ws_search and ws_search in _norm_ws(full_pt):
-                candidates_del.append(para)
     def _in_table(para):
         p = para._element
         return any(a.tag == qn('w:tc') for a in p.iterancestors())
     for pool, conf in [(candidates_exact, 1.0), (candidates_norm, 0.9),
-                       (candidates_ws, 0.8), (candidates_del, 0.6)]:
         if not pool:
             continue
-        if prefer_not_in_table:
-            body_only = [p for p in pool if not _in_table(p)]
-            if body_only:
-                return body_only[0], conf
-        return pool[0], conf
-    return None, 0.0
 def _find_table_by_section(doc, section_heading):
@@ -233,11 +387,33 @@ def _find_table(doc, header_key):
     return None, 0.0
-def _find_row(tbl, anchor_text):
     """
     Find first row in tbl where col-0 cell text contains anchor_text.
     Returns (row_idx, confidence) or (-1, 0.0).
     Matching levels, in order of confidence:
       1.0 — exact substring match
       0.9 — Unicode-normalised match   (_norm: xa0, dashes, quotes, …)
@@ -248,38 +424,33 @@ def _find_row(tbl, anchor_text):
       0.5  — clean-prefix + token-overlap: when multiple rows share the prefix,
              pick the one whose col-0 tokens overlap most with the anchor tokens.
     """
     norm_anchor  = _norm(anchor_text)
     ws_anchor    = _norm_ws(anchor_text)
     alnum_anchor = _norm_alnum(anchor_text)
-    best = (-1, 0.0)
-    for idx, row in enumerate(tbl.rows):
-        cell0 = row.cells[0].text if row.cells else ''
-        if anchor_text in cell0:
-            return idx, 1.0
-        if norm_anchor and norm_anchor in _norm(cell0) and best[1] < 0.9:
-            best = (idx, 0.9)
-        elif ws_anchor and ws_anchor in _norm_ws(cell0) and best[1] < 0.8:
-            best = (idx, 0.8)
-        elif alnum_anchor and alnum_anchor in _norm_alnum(cell0) and best[1] < 0.6:
-            best = (idx, 0.6)
-    if best[0] >= 0:
-        return best
     # ── Prefix-based partial match ─────────────────────────────────────────────
-    # The anchor may have Unicode chars embedded mid-text that prevent all string
-    # comparisons above from matching, even after normalisation (e.g. when the CR
-    # extracts '\xa0' between spec-number parts but the TS has different encoding).
-    # Strategy: use only the clean ASCII prefix of the anchor as the search key.
-    # If that prefix is found in exactly one row → we've uniquely identified it.
-    # If it appears in several rows → pick the one whose full token set overlaps
-    # most with the anchor's tokens (the user's described disambiguation rule).
     prefix = _clean_prefix(anchor_text)
     if prefix and len(prefix) > 8:
         prefix_low = prefix.lower()
         hits = [
-            idx for idx, row in enumerate(tbl.rows)
             if row.cells and prefix_low in row.cells[0].text.lower()
         ]
         if len(hits) == 1:
@@ -289,7 +460,7 @@ def _find_row(tbl, anchor_text):
             best_score, best_idx = -1, -1
             for hit_idx in hits:
                 cell_tokens = set(re.findall(r'[a-z0-9]+',
-                                             tbl.rows[hit_idx].cells[0].text.lower()))
                 score = len(anchor_tokens & cell_tokens)
                 if score > best_score:
                     best_score, best_idx = score, hit_idx
@@ -401,29 +572,77 @@ def _apply_section_replace(doc, change, rev, author, date, log):
     loc = change['location']
     del_heading = loc.get('del_heading', '')
     has_del_table = loc.get('has_del_table', False)
     elements_xml = change.get('elements_xml', [])
     if not elements_xml:
         log.append('  SKIP section_replace: no elements in manifest')
         return False
     # ── Find the TS paragraph that matches the deleted heading ─────────────────
     ts_para_elem = None
     if del_heading:
-        for para in doc.paragraphs:
             pt = para.text
             if del_heading in pt or _norm(del_heading) in _norm(pt):
                 ts_para_elem = para._element
                 break
         if ts_para_elem is None:
             # Fallback: include paragraphs whose XML text (inc. del runs) matches
-            for para in doc.paragraphs:
                 if del_heading in _full_para_text(para):
                     ts_para_elem = para._element
                     break
     if ts_para_elem is None:
         log.append(f'  ERROR section_replace: del_heading {del_heading!r} not found in TS')
         return False
     ts_body = ts_para_elem.getparent()
@@ -475,13 +694,16 @@ def _apply_section_replace(doc, change, rev, author, date, log):
                 el.set(qn('w:id'), rev.next())
         cloned.append(cloned_elem)
-    # ── Insert cloned elements before the TS heading paragraph ────────────────
     insert_idx = list(ts_body).index(ts_para_elem)
     for i, elem in enumerate(cloned):
         ts_body.insert(insert_idx + i, elem)
-    # ── Remove the now-replaced TS elements ───────────────────────────────────
-    ts_body.remove(ts_para_elem)
     if ts_tbl_elem is not None:
         ts_body.remove(ts_tbl_elem)
@@ -580,14 +802,24 @@ def _apply_text_replace(doc, change, rev, author, date, log):
     elif loc['kind'] == 'body_para':
         ctx = loc.get('para_context', '')
-        # Try to find the paragraph by old text first
-        para, conf = _find_para(doc, old, prefer_not_in_table=True)
         if para is None:
-            # Fall back: find by paragraph context
-            para, conf = _find_para(doc, ctx, prefer_not_in_table=True)
-            if para is None:
-                log.append(f"  ERROR text_replace: old text {old!r} not found in TS")
-                return False
         if old in para.text:
             tracked_modify_para(para, old, new, rev, author, date)
             log.append(f"  OK  text_replace (body_para conf={conf:.1f}): {old!r} → {new!r}")
@@ -600,14 +832,26 @@ def _apply_text_replace(doc, change, rev, author, date, log):
 def _apply_para_insert(doc, change, rev, author, date, log):
-    anchor_text = change['location'].get('anchor_text', '')
     paras_data = change.get('paragraphs', [])
     if not paras_data:
         return True
-    anchor_para, conf = _find_para(doc, anchor_text)
     if anchor_para is None:
-        log.append(f"  ERROR para_insert: anchor not found {anchor_text[:60]!r}")
         return False
     items = [(p['text'], p['style'] or 'Normal') for p in paras_data]
@@ -633,7 +877,8 @@ def _apply_row_insert(doc, change, rev, author, date, log, last_inserted=None):
             return False
     after_anchor = loc.get('after_row_anchor', '')
-    row_idx, r_conf = _find_row(tbl, after_anchor)
     if row_idx < 0:
         log.append(f"  ERROR row_insert: anchor row not found {after_anchor!r}")
         return False
@@ -697,7 +942,7 @@ def _merge_para_inserts(manifest):
 def apply_manifest(ts_path, manifest, out_path, author=DEFAULT_AUTHOR, date=DEFAULT_DATE):
     """
     Apply all changes in manifest to ts_path, save to out_path.
-    Returns (n_ok, n_skipped, log_lines).
     """
     doc = docx.Document(str(ts_path))
     rev = RevCounter(doc)
@@ -705,7 +950,9 @@ def apply_manifest(ts_path, manifest, out_path, author=DEFAULT_AUTHOR, date=DEFA
     n_ok = 0
     n_skip = 0
     manifest = _merge_para_inserts(manifest)
     # Track last inserted <w:tr> per (tbl_id, anchor_row_idx) to maintain
     # forward insertion order when multiple row_inserts target the same anchor.
@@ -732,7 +979,7 @@ def apply_manifest(ts_path, manifest, out_path, author=DEFAULT_AUTHOR, date=DEFA
             n_skip += 1
     doc.save(str(out_path))
-    return n_ok, n_skip, log
 # ── CLI ───────────────────────────────────────────────────────────────────────
@@ -752,11 +999,11 @@ def main():
         manifest = json.load(f)
     print(f'Applying {len(manifest)} change(s) from manifest to {ts_path.name}...')
-    n_ok, n_skip, log = apply_manifest(ts_path, manifest, out_path, author=args.author)
     for line in log:
         print(line)
-    print(f'\nResult: {n_ok} applied, {n_skip} skipped')
     print(f'Output: {out_path}')

     ('\u2019', "'"),  # right single quote
     ('\u201c', '"'),  # left double quote
     ('\u201d', '"'),  # right double quote
+    ('\u2026', '...'),  # horizontal ellipsis → three dots
 )
            ''.join(t.text or '' for t in el.findall('.//' + qn('w:delText')))
+def _original_para_text(para):
+    """Reconstruct paragraph text as it was before tracked changes.
+    Iterates in document order, keeping:
+      - w:t runs that are NOT inside a w:ins element  (stable text)
+      - w:delText runs                                  (deleted-but-original text)
+    Skipping:
+      - w:t runs inside w:ins                           (newly inserted text)
+    This allows anchors that reference original phrasing (e.g. 'SCP81Connection')
+    to still match after a tracked '1'→'X' replacement has been applied to that
+    paragraph — where _full_para_text would return the concatenation out of order.
     """
+    el = para._element
+    result = []
+    for node in el.iter():
+        if node.tag == qn('w:t'):
+            # Skip if this w:t is wrapped in a w:ins element
+            is_inserted = False
+            for anc in node.iterancestors():
+                if anc is el:
+                    break
+                if anc.tag == qn('w:ins'):
+                    is_inserted = True
+                    break
+            if not is_inserted:
+                result.append(node.text or '')
+        elif node.tag == qn('w:delText'):
+            result.append(node.text or '')
+    return ''.join(result)
+def _match_paragraphs(paragraphs, search_text, prefer_not_in_table=False):
+    """Core 5-tier matching logic. Operates on any iterable of Paragraph objects.
+    Returns (para, confidence) or (None, 0.0)."""
     norm_search = _norm(search_text)
     ws_search = _norm_ws(search_text)
     candidates_exact = []
     candidates_norm = []
     candidates_ws = []
+    candidates_orig = []
     candidates_del = []
+    for para in paragraphs:
         pt = para.text
         if search_text in pt:
             candidates_exact.append(para)
         elif ws_search and ws_search in _norm_ws(pt):
             candidates_ws.append(para)
         else:
+            orig_pt = _original_para_text(para)
+            if (search_text in orig_pt
+                    or (norm_search and norm_search in _norm(orig_pt))):
+                candidates_orig.append(para)
+            elif ws_search and ws_search in _norm_ws(orig_pt):
+                candidates_orig.append(para)
+            else:
+                full_pt = _full_para_text(para)
+                if search_text in full_pt:
+                    candidates_del.append(para)
+                elif ws_search and ws_search in _norm_ws(full_pt):
+                    candidates_del.append(para)
     def _in_table(para):
         p = para._element
         return any(a.tag == qn('w:tc') for a in p.iterancestors())
+    if not prefer_not_in_table:
+        for pool, conf in [(candidates_exact, 1.0), (candidates_norm, 0.9),
+                           (candidates_ws, 0.8), (candidates_orig, 0.7),
+                           (candidates_del, 0.6)]:
+            if pool:
+                return pool[0], conf
+        return None, 0.0
+    best_table_match = (None, 0.0)
     for pool, conf in [(candidates_exact, 1.0), (candidates_norm, 0.9),
+                       (candidates_ws, 0.8), (candidates_orig, 0.7),
+                       (candidates_del, 0.6)]:
         if not pool:
             continue
+        body_only = [p for p in pool if not _in_table(p)]
+        if body_only:
+            return body_only[0], conf
+        if best_table_match[0] is None:
+            best_table_match = (pool[0], conf)
+    return best_table_match if best_table_match[0] is not None else (None, 0.0)
+def _find_para(doc, search_text, prefer_not_in_table=False):
+    """Find the first paragraph containing search_text across the entire doc.
+    Five-tier matching (see _match_paragraphs). Returns (para, confidence)."""
+    return _match_paragraphs(doc.paragraphs, search_text, prefer_not_in_table)
+# ── Section-aware anchor search ───────────────────────────────────────────────
+_HEADING_NUM_RE = re.compile(r'^(\d+(?:\.\d+)*)\s+\S')
+def _para_heading_number(para):
+    """Dotted section number if this paragraph is a real TS heading, else None.
+    Requires the paragraph style to start with 'Heading' (case-insensitive) — this
+    rejects false positives from TOC entries (style 'toc N'), address lines in the
+    front matter (style 'FP'), change history labels (style 'B3'), etc. ETSI/3GPP
+    TS documents always style real headings as 'Heading 1'..'Heading N'."""
+    style_name = (para.style.name if para.style is not None else '') or ''
+    if not style_name.lower().startswith('heading'):
+        return None
+    m = _HEADING_NUM_RE.match(para.text.strip())
+    return m.group(1) if m else None
+def _is_descendant_section(child, parent):
+    """True if `child` is `parent` or nested under it (by dotted-prefix)."""
+    return child == parent or child.startswith(parent + '.')
+def _section_range(doc, target):
+    """Return (start_idx, end_idx) into doc.paragraphs spanning the target section.
+    start = index of the heading whose number == target.
+    end   = index of the next heading whose number is NOT a descendant of target
+            (or len(doc.paragraphs) if none).
+    Returns (None, None) if target heading not found. Recomputed per-call."""
+    paras = doc.paragraphs
+    start = None
+    for i, p in enumerate(paras):
+        n = _para_heading_number(p)
+        if n is None:
+            continue
+        if start is None and n == target:
+            start = i
+            continue
+        if start is not None and not _is_descendant_section(n, target):
+            return (start, i)
+    return (start, len(paras)) if start is not None else (None, None)
+def _enclosing_heading(doc, para):
+    """Walk backward from para to the first preceding heading paragraph.
+    Returns the heading Paragraph or None. Used for HINT lines."""
+    paras = doc.paragraphs
+    target_elem = para._element
+    start_idx = None
+    for i, p in enumerate(paras):
+        if p._element is target_elem:
+            start_idx = i
+            break
+    if start_idx is None:
+        return None
+    for i in range(start_idx, -1, -1):
+        if _para_heading_number(paras[i]) is not None:
+            return paras[i]
+    return None
+def _find_para_in_section(doc, search_text, section_number, prefer_not_in_table=False):
+    """Section-restricted _find_para. Returns (para, conf, status) where
+    status ∈ {"in_section", "no_section"}. On no_section, caller should
+    fall back to global _find_para with a WARN log line."""
+    if not section_number:
+        return (None, 0.0, 'no_section')
+    start, end = _section_range(doc, section_number)
+    if start is None:
+        return (None, 0.0, 'no_section')
+    para, conf = _match_paragraphs(doc.paragraphs[start:end], search_text,
+                                    prefer_not_in_table)
+    return (para, conf, 'in_section')
+def _find_para_with_section(doc, search_text, section_number, kind_label, log,
+                             prefer_not_in_table=False):
+    """Section-aware anchor search with WARN/ERROR logging.
+    Behaviour:
+      * section_number present + found in TS + anchor in range → return (para, conf).
+      * section_number present + not in TS → WARN, fall back to global _find_para.
+      * section_number present + anchor NOT in range → ERROR + HINT, return (None, 0).
+      * section_number missing → WARN, fall back to global _find_para.
+    Logs go to `log` (list of str)."""
+    if section_number:
+        para, conf, status = _find_para_in_section(
+            doc, search_text, section_number, prefer_not_in_table)
+        if status == 'in_section' and para is not None:
+            return para, conf
+        if status == 'no_section':
+            log.append(f"  WARN section '{section_number}' not found in TS — falling back to global search")
+            return _find_para(doc, search_text, prefer_not_in_table)
+        # in_section but anchor absent — check global for HINT
+        g_para, _ = _find_para(doc, search_text, prefer_not_in_table)
+        if g_para is not None:
+            enc = _enclosing_heading(doc, g_para)
+            actual = _para_heading_number(enc) if enc is not None else '?'
+            log.append(f"  ERROR {kind_label}: anchor {search_text[:60]!r} declared in section "
+                       f"{section_number} but found in section {actual}")
+            log.append(f"    HINT nearest match: {g_para.text[:120]!r}")
+        else:
+            log.append(f"  ERROR {kind_label}: anchor {search_text[:60]!r} not found in section "
+                       f"{section_number} (or anywhere)")
+        return None, 0.0
+    log.append(f"  WARN no section_number on change — global anchor search for {search_text[:60]!r}")
+    return _find_para(doc, search_text, prefer_not_in_table)
 def _find_table_by_section(doc, section_heading):
     return None, 0.0
+def _disambiguate_by_context(all_rows, candidates, context_rows_before):
+    """Pick the candidate whose preceding rows best match context_rows_before.
+    context_rows_before: list of expected col-0 texts, closest-first.
+    Returns the best candidate index; falls back to candidates[0] on tie."""
+    best_score, best_idx = -1, candidates[0]
+    for idx in candidates:
+        score = 0
+        for depth, expected in enumerate(context_rows_before, start=1):
+            ctx_idx = idx - depth
+            if ctx_idx < 0 or not expected:
+                continue
+            cell0 = all_rows[ctx_idx].cells[0].text if all_rows[ctx_idx].cells else ''
+            if _norm(expected) in _norm(cell0) or _norm_ws(expected) in _norm_ws(cell0):
+                score += 1
+        if score > best_score:
+            best_score, best_idx = score, idx
+    return best_idx
+def _find_row(tbl, anchor_text, context_rows_before=None):
     """
     Find first row in tbl where col-0 cell text contains anchor_text.
     Returns (row_idx, confidence) or (-1, 0.0).
+    When context_rows_before is provided and multiple rows match, uses the
+    col-0 texts of the rows preceding each candidate to disambiguate.
     Matching levels, in order of confidence:
       1.0 — exact substring match
       0.9 — Unicode-normalised match   (_norm: xa0, dashes, quotes, …)
       0.5  — clean-prefix + token-overlap: when multiple rows share the prefix,
              pick the one whose col-0 tokens overlap most with the anchor tokens.
     """
+    all_rows = list(tbl.rows)
     norm_anchor  = _norm(anchor_text)
     ws_anchor    = _norm_ws(anchor_text)
     alnum_anchor = _norm_alnum(anchor_text)
+    for match_fn, conf in [
+        (lambda c: anchor_text in c,                                      1.0),
+        (lambda c: bool(norm_anchor) and norm_anchor in _norm(c),         0.9),
+        (lambda c: bool(ws_anchor) and ws_anchor in _norm_ws(c),          0.8),
+        (lambda c: bool(alnum_anchor) and alnum_anchor in _norm_alnum(c), 0.6),
+    ]:
+        candidates = [
+            idx for idx, row in enumerate(all_rows)
+            if row.cells and match_fn(row.cells[0].text)
+        ]
+        if not candidates:
+            continue
+        if len(candidates) == 1 or not context_rows_before:
+            return candidates[0], conf
+        return _disambiguate_by_context(all_rows, candidates, context_rows_before), conf
     # ── Prefix-based partial match ─────────────────────────────────────────────
     prefix = _clean_prefix(anchor_text)
     if prefix and len(prefix) > 8:
         prefix_low = prefix.lower()
         hits = [
+            idx for idx, row in enumerate(all_rows)
             if row.cells and prefix_low in row.cells[0].text.lower()
         ]
         if len(hits) == 1:
             best_score, best_idx = -1, -1
             for hit_idx in hits:
                 cell_tokens = set(re.findall(r'[a-z0-9]+',
+                                             all_rows[hit_idx].cells[0].text.lower()))
                 score = len(anchor_tokens & cell_tokens)
                 if score > best_score:
                     best_score, best_idx = score, hit_idx
     loc = change['location']
     del_heading = loc.get('del_heading', '')
     has_del_table = loc.get('has_del_table', False)
+    section_number = loc.get('section_number', '')
     elements_xml = change.get('elements_xml', [])
     if not elements_xml:
         log.append('  SKIP section_replace: no elements in manifest')
         return False
+    # ── Resolve search scope: restrict to declared section if possible ─────────
+    search_paras = doc.paragraphs
+    section_status = 'no_section_required'
+    if section_number:
+        start, end = _section_range(doc, section_number)
+        if start is not None:
+            search_paras = doc.paragraphs[start:end]
+            section_status = 'in_section'
+        else:
+            log.append(f"  WARN section '{section_number}' not found in TS — falling back to global search")
+            section_status = 'section_not_in_ts'
+    else:
+        log.append("  WARN no section_number on section_replace — global search")
     # ── Find the TS paragraph that matches the deleted heading ─────────────────
     ts_para_elem = None
+    insert_after_anchor = False  # when True: insert after anchor, don't delete it
     if del_heading:
+        for para in search_paras:
             pt = para.text
             if del_heading in pt or _norm(del_heading) in _norm(pt):
                 ts_para_elem = para._element
                 break
         if ts_para_elem is None:
             # Fallback: include paragraphs whose XML text (inc. del runs) matches
+            for para in search_paras:
                 if del_heading in _full_para_text(para):
                     ts_para_elem = para._element
                     break
+    else:
+        # No heading to delete — use anchor_text to find insertion point
+        anchor_text = loc.get('anchor_text', '')
+        if anchor_text:
+            if section_status == 'in_section':
+                anchor_para, _, _ = _find_para_in_section(
+                    doc, anchor_text, section_number)
+            else:
+                anchor_para, _ = _find_para(doc, anchor_text)
+            if anchor_para is not None:
+                ts_para_elem = anchor_para._element
+                insert_after_anchor = True
     if ts_para_elem is None:
+        # Section mismatch check: if declared section exists, but del_heading
+        # is found GLOBALLY in a different section, report that.
+        if section_status == 'in_section' and del_heading:
+            for para in doc.paragraphs:
+                pt = para.text
+                if del_heading in pt or del_heading in _full_para_text(para):
+                    enc = _enclosing_heading(doc, para)
+                    actual = _para_heading_number(enc) if enc is not None else '?'
+                    log.append(f'  ERROR section_replace: del_heading {del_heading!r} declared in section '
+                               f'{section_number} but found in section {actual}')
+                    log.append(f"    HINT nearest match: {para.text[:120]!r}")
+                    return False
         log.append(f'  ERROR section_replace: del_heading {del_heading!r} not found in TS')
+        tokens = del_heading.split()[:3] if del_heading else []
+        if tokens:
+            _hints = sorted(
+                [p for p in doc.paragraphs if any(tok in p.text for tok in tokens)],
+                key=lambda p: -len(set(del_heading.split()) & set(p.text.split()))
+            )[:3]
+            for _h in _hints:
+                log.append(f"    HINT nearest match: {_h.text[:120]!r}")
         return False
     ts_body = ts_para_elem.getparent()
                 el.set(qn('w:id'), rev.next())
         cloned.append(cloned_elem)
+    # ── Insert cloned elements before (or after) the anchor paragraph ─────────
     insert_idx = list(ts_body).index(ts_para_elem)
+    if insert_after_anchor:
+        insert_idx += 1  # insert after anchor, not before it
     for i, elem in enumerate(cloned):
         ts_body.insert(insert_idx + i, elem)
+    # ── Remove the now-replaced TS elements (only when a heading was deleted) ──
+    if not insert_after_anchor:
+        ts_body.remove(ts_para_elem)
     if ts_tbl_elem is not None:
         ts_body.remove(ts_tbl_elem)
     elif loc['kind'] == 'body_para':
         ctx = loc.get('para_context', '')
+        section_number = loc.get('section_number', '')
+        if len(old) < 4 and ctx:
+            # Short old text matches too broadly (e.g. a single digit would hit
+            # the title paragraph).  Locate by context first, then verify old
+            # text is present in that paragraph.
+            para, conf = _find_para_with_section(
+                doc, ctx, section_number, 'text_replace', log, prefer_not_in_table=True)
+            if para is None or old not in para.text:
+                para = None
+        else:
+            para, conf = _find_para_with_section(
+                doc, old, section_number, 'text_replace', log, prefer_not_in_table=True)
+            if para is None and ctx:
+                para, conf = _find_para_with_section(
+                    doc, ctx, section_number, 'text_replace', log, prefer_not_in_table=True)
         if para is None:
+            log.append(f"  ERROR text_replace: old text {old!r} not found in TS")
+            return False
         if old in para.text:
             tracked_modify_para(para, old, new, rev, author, date)
             log.append(f"  OK  text_replace (body_para conf={conf:.1f}): {old!r} → {new!r}")
 def _apply_para_insert(doc, change, rev, author, date, log):
+    loc = change['location']
+    anchor_text = loc.get('anchor_text', '')
+    section_number = loc.get('section_number', '')
     paras_data = change.get('paragraphs', [])
     if not paras_data:
         return True
+    anchor_para, conf = _find_para_with_section(
+        doc, anchor_text, section_number, 'para_insert', log)
     if anchor_para is None:
+        # When no section_number context, emit the legacy ERROR + HINT lines
+        if not section_number:
+            log.append(f"  ERROR para_insert: anchor not found {anchor_text[:60]!r}")
+            tokens = anchor_text.split()[:3]
+            _hints = sorted(
+                [p for p in doc.paragraphs if any(tok in p.text for tok in tokens)],
+                key=lambda p: -len(set(anchor_text.split()) & set(p.text.split()))
+            )[:3]
+            for _h in _hints:
+                log.append(f"    HINT nearest match: {_h.text[:120]!r}")
         return False
     items = [(p['text'], p['style'] or 'Normal') for p in paras_data]
             return False
     after_anchor = loc.get('after_row_anchor', '')
+    context_rows_before = loc.get('context_rows_before', [])
+    row_idx, r_conf = _find_row(tbl, after_anchor, context_rows_before)
     if row_idx < 0:
         log.append(f"  ERROR row_insert: anchor row not found {after_anchor!r}")
         return False
 def apply_manifest(ts_path, manifest, out_path, author=DEFAULT_AUTHOR, date=DEFAULT_DATE):
     """
     Apply all changes in manifest to ts_path, save to out_path.
+    Returns (n_ok, n_skipped, log_lines, n_parsed, n_merged_groups).
     """
     doc = docx.Document(str(ts_path))
     rev = RevCounter(doc)
     n_ok = 0
     n_skip = 0
+    n_parsed = len(manifest)
     manifest = _merge_para_inserts(manifest)
+    n_merged = len(manifest)
     # Track last inserted <w:tr> per (tbl_id, anchor_row_idx) to maintain
     # forward insertion order when multiple row_inserts target the same anchor.
             n_skip += 1
     doc.save(str(out_path))
+    return n_ok, n_skip, log, n_parsed, n_merged
 # ── CLI ───────────────────────────────────────────────────────────────────────
         manifest = json.load(f)
     print(f'Applying {len(manifest)} change(s) from manifest to {ts_path.name}...')
+    n_ok, n_skip, log, n_parsed, n_merged = apply_manifest(ts_path, manifest, out_path, author=args.author)
     for line in log:
         print(line)
+    print(f'\nParsed: {n_parsed} body changes (merged to {n_merged} groups) → Applied: {n_ok}  Skipped: {n_skip}')
     print(f'Output: {out_path}')