Spaces:
Paused
Paused
Upload bilingual_book_maker/book_maker/backmatter.py with huggingface_hub
Browse files
bilingual_book_maker/book_maker/backmatter.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import re
|
| 4 |
+
from collections.abc import Iterable
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
DEFAULT_BACKMATTER_TITLES = (
|
| 8 |
+
"about the author",
|
| 9 |
+
"notes about the author",
|
| 10 |
+
"notes on the author",
|
| 11 |
+
"biography",
|
| 12 |
+
"biographical note",
|
| 13 |
+
"credits",
|
| 14 |
+
"acknowledgments",
|
| 15 |
+
"acknowledgements",
|
| 16 |
+
"copyright",
|
| 17 |
+
"bibliography",
|
| 18 |
+
"index",
|
| 19 |
+
"endnotes",
|
| 20 |
+
"notes",
|
| 21 |
+
"further reading",
|
| 22 |
+
"also by the author",
|
| 23 |
+
"about this book",
|
| 24 |
+
"about this ebook",
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
NON_ALNUM_RE = re.compile(r"[^a-z0-9]+")
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def normalize_heading(text: str | None) -> str:
|
| 31 |
+
value = (text or "").strip().lower()
|
| 32 |
+
if not value:
|
| 33 |
+
return ""
|
| 34 |
+
value = NON_ALNUM_RE.sub(" ", value)
|
| 35 |
+
return " ".join(value.split())
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def split_backmatter_titles(value: str | Iterable[str] | None) -> tuple[str, ...]:
|
| 39 |
+
if value is None:
|
| 40 |
+
return DEFAULT_BACKMATTER_TITLES
|
| 41 |
+
if isinstance(value, str):
|
| 42 |
+
candidates = value.split(",")
|
| 43 |
+
else:
|
| 44 |
+
candidates = list(value)
|
| 45 |
+
|
| 46 |
+
normalized = [normalize_heading(item) for item in candidates]
|
| 47 |
+
filtered = [item for item in normalized if item]
|
| 48 |
+
return tuple(filtered) or DEFAULT_BACKMATTER_TITLES
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def matches_backmatter_title(title: str | None, keywords: str | Iterable[str] | None = None) -> bool:
|
| 52 |
+
normalized_title = normalize_heading(title)
|
| 53 |
+
if not normalized_title:
|
| 54 |
+
return False
|
| 55 |
+
|
| 56 |
+
normalized_keywords = split_backmatter_titles(keywords)
|
| 57 |
+
return any(
|
| 58 |
+
normalized_title == keyword
|
| 59 |
+
or normalized_title.startswith(f"{keyword} ")
|
| 60 |
+
for keyword in normalized_keywords
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def should_skip_backmatter(
|
| 65 |
+
*,
|
| 66 |
+
title: str | None,
|
| 67 |
+
current_percent: float,
|
| 68 |
+
skip_after_percent: int | float,
|
| 69 |
+
keywords: str | Iterable[str] | None = None,
|
| 70 |
+
) -> bool:
|
| 71 |
+
if current_percent <= skip_after_percent:
|
| 72 |
+
return False
|
| 73 |
+
return matches_backmatter_title(title, keywords)
|