Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import re | |
| from pathlib import Path | |
| CHAPTER_CATALOG = { | |
| 1: "Relations and Functions", | |
| 2: "Inverse Trigonometric Functions", | |
| 3: "Matrices", | |
| 4: "Determinants", | |
| 5: "Continuity and Differentiability", | |
| 6: "Application of Derivatives", | |
| 7: "Integrals", | |
| 8: "Application of Integrals", | |
| 9: "Differential Equations", | |
| 10: "Vector Algebra", | |
| 11: "Three Dimensional Geometry", | |
| 12: "Linear Programming", | |
| 13: "Probability", | |
| } | |
| def parse_chapter_number(file_path: Path) -> int | None: | |
| match = re.search(r"chapter\s*([0-9]+)", file_path.stem, re.IGNORECASE) | |
| if not match: | |
| return None | |
| return int(match.group(1)) | |
| def clean_title(text: str) -> str: | |
| text = re.sub(r"\s+", " ", text.replace("_", " ")).strip() | |
| return text.title() | |
| def pdf_metadata(file_path: Path) -> dict[str, str | int]: | |
| chapter_number = parse_chapter_number(file_path) | |
| chapter_name = CHAPTER_CATALOG.get(chapter_number or 0, clean_title(file_path.stem)) | |
| return { | |
| "chapter_number": chapter_number or -1, | |
| "chapter_name": chapter_name, | |
| "source_file": file_path.name, | |
| } | |