cstr commited on
Commit
f345d03
Β·
verified Β·
1 Parent(s): 0b206b1

Update format_transplant.py

Browse files
Files changed (1) hide show
  1. format_transplant.py +52 -29
format_transplant.py CHANGED
@@ -748,15 +748,25 @@ class BlueprintAnalyzer:
748
 
749
  # ── Separator after marker ────────────────────────────
750
  # A separator run is one whose ENTIRE text content is
751
- # whitespace (tab, space, or empty). If the next run has
752
- # actual content, this footnote has no dedicated separator
753
- # run β€” skip it and try the next footnote.
754
  if not sep_found:
755
  if ri + 1 < len(runs):
756
  next_r = runs[ri + 1]
 
757
  t_elems = next_r.findall(qn("w:t"))
758
  sep_text = "".join(t.text or "" for t in t_elems)
759
- if sep_text.strip() == "":
 
 
 
 
 
 
 
 
 
760
  # Pure whitespace β†’ this IS the separator run
761
  schema.footnote_separator = sep_text
762
  sep_found = True
@@ -1856,12 +1866,12 @@ class DocumentBuilder:
1856
  def _normalize_fn_separator(self, p_elem: Any) -> None:
1857
  """
1858
  Ensure the run immediately after <w:footnoteRef> carries the same
1859
- separator text as the blueprint's footnotes (tab, space, or nothing).
1860
 
1861
  Three cases handled:
1862
- β€’ Separator run exists, text matches β†’ no-op
1863
- β€’ Separator run exists, text differs β†’ replace its text content
1864
- β€’ No run after marker, blueprint wants one β†’ insert a bare run with the text
1865
  Only acts when schema.footnote_separator was successfully read from the blueprint.
1866
  """
1867
  wanted = self.schema.footnote_separator
@@ -1873,11 +1883,14 @@ class DocumentBuilder:
1873
 
1874
  def _make_sep_run(text: str):
1875
  sep_r = OxmlElement("w:r")
1876
- t_elem = OxmlElement("w:t")
1877
- t_elem.text = text
1878
- if " " in text or "\t" in text:
1879
- t_elem.set(_XML_SPACE_ATTR, "preserve")
1880
- sep_r.append(t_elem)
 
 
 
1881
  return sep_r
1882
 
1883
  for ri, r_elem in enumerate(runs):
@@ -1886,34 +1899,44 @@ class DocumentBuilder:
1886
 
1887
  if ri + 1 < len(runs):
1888
  next_r = runs[ri + 1]
 
1889
  t_elems = next_r.findall(qn("w:t"))
1890
- current = "".join(t.text or "" for t in t_elems)
1891
- is_sep_run = current.strip() == "" # purely whitespace = separator run
 
 
1892
 
1893
  if is_sep_run:
 
 
 
 
1894
  if wanted == "":
1895
- # Blueprint has no separator β€” clear the whitespace run
1896
- for t in t_elems:
1897
- t.text = ""
 
1898
  logger.debug("[BUILD] Footnote separator cleared")
1899
- elif current != wanted:
1900
- # Replace whitespace content with the blueprint's separator
1901
- if t_elems:
1902
- t_elems[0].text = wanted
1903
- if " " in wanted or "\t" in wanted:
1904
- t_elems[0].set(_XML_SPACE_ATTR, "preserve")
1905
- for t in t_elems[1:]:
1906
- t.text = ""
1907
  else:
1908
  t_elem = OxmlElement("w:t")
1909
  t_elem.text = wanted
1910
- if " " in wanted or "\t" in wanted:
1911
  t_elem.set(_XML_SPACE_ATTR, "preserve")
1912
  next_r.append(t_elem)
1913
  logger.debug(
1914
- "[BUILD] Footnote separator: %r β†’ %r", current, wanted
 
 
1915
  )
1916
- # else: already matches β€” no-op
1917
  else:
1918
  # Next run is actual footnote text, not a separator run.
1919
  if wanted:
 
748
 
749
  # ── Separator after marker ────────────────────────────
750
  # A separator run is one whose ENTIRE text content is
751
+ # whitespace (tab, space, or empty) OR contains a <w:tab/>.
752
+ # If the next run has actual content, this footnote has no
753
+ # dedicated separator run β€” skip it and try the next footnote.
754
  if not sep_found:
755
  if ri + 1 < len(runs):
756
  next_r = runs[ri + 1]
757
+ has_tab = next_r.find(qn("w:tab")) is not None
758
  t_elems = next_r.findall(qn("w:t"))
759
  sep_text = "".join(t.text or "" for t in t_elems)
760
+
761
+ if has_tab:
762
+ # Prioritize physical tab element over text
763
+ schema.footnote_separator = "\t"
764
+ sep_found = True
765
+ logger.debug(
766
+ "[BLUEPRINT] Footnote separator: <w:tab/> (fn id=%d)",
767
+ fn_id,
768
+ )
769
+ elif sep_text.strip() == "":
770
  # Pure whitespace β†’ this IS the separator run
771
  schema.footnote_separator = sep_text
772
  sep_found = True
 
1866
  def _normalize_fn_separator(self, p_elem: Any) -> None:
1867
  """
1868
  Ensure the run immediately after <w:footnoteRef> carries the same
1869
+ separator text or tab element as the blueprint's footnotes.
1870
 
1871
  Three cases handled:
1872
+ β€’ Separator run exists, content matches β†’ no-op
1873
+ β€’ Separator run exists, content differs β†’ replace its content
1874
+ β€’ No run after marker, blueprint wants one β†’ insert a new run
1875
  Only acts when schema.footnote_separator was successfully read from the blueprint.
1876
  """
1877
  wanted = self.schema.footnote_separator
 
1883
 
1884
  def _make_sep_run(text: str):
1885
  sep_r = OxmlElement("w:r")
1886
+ if text == "\t":
1887
+ sep_r.append(OxmlElement("w:tab"))
1888
+ else:
1889
+ t_elem = OxmlElement("w:t")
1890
+ t_elem.text = text
1891
+ if " " in text:
1892
+ t_elem.set(_XML_SPACE_ATTR, "preserve")
1893
+ sep_r.append(t_elem)
1894
  return sep_r
1895
 
1896
  for ri, r_elem in enumerate(runs):
 
1899
 
1900
  if ri + 1 < len(runs):
1901
  next_r = runs[ri + 1]
1902
+ has_tab = next_r.find(qn("w:tab")) is not None
1903
  t_elems = next_r.findall(qn("w:t"))
1904
+ current_text = "".join(t.text or "" for t in t_elems)
1905
+
1906
+ # A run is a separator run if it has a tab OR is purely whitespace text
1907
+ is_sep_run = has_tab or current_text.strip() == ""
1908
 
1909
  if is_sep_run:
1910
+ # Decide if current content matches 'wanted'
1911
+ # (Note: we treat any existing tab element as equivalent to wanted="\t")
1912
+ matches = (has_tab and wanted == "\t") or (not has_tab and current_text == wanted)
1913
+
1914
  if wanted == "":
1915
+ # Blueprint has no separator β€” clear the run's content
1916
+ for child in list(next_r):
1917
+ if child.tag in (qn("w:t"), qn("w:tab")):
1918
+ next_r.remove(child)
1919
  logger.debug("[BUILD] Footnote separator cleared")
1920
+ elif not matches:
1921
+ # Replace all existing content with the blueprint's separator
1922
+ for child in list(next_r):
1923
+ if child.tag in (qn("w:t"), qn("w:tab")):
1924
+ next_r.remove(child)
1925
+
1926
+ if wanted == "\t":
1927
+ next_r.append(OxmlElement("w:tab"))
1928
  else:
1929
  t_elem = OxmlElement("w:t")
1930
  t_elem.text = wanted
1931
+ if " " in wanted:
1932
  t_elem.set(_XML_SPACE_ATTR, "preserve")
1933
  next_r.append(t_elem)
1934
  logger.debug(
1935
+ "[BUILD] Footnote separator: %r β†’ %r",
1936
+ ("<w:tab/>" if has_tab else current_text),
1937
+ wanted
1938
  )
1939
+ # else: matches β€” no-op
1940
  else:
1941
  # Next run is actual footnote text, not a separator run.
1942
  if wanted: