from __future__ import annotations import re from pathlib import Path CHAPTER_CATALOG = { 1: "Relations and Functions", 2: "Inverse Trigonometric Functions", 3: "Matrices", 4: "Determinants", 5: "Continuity and Differentiability", 6: "Application of Derivatives", 7: "Integrals", 8: "Application of Integrals", 9: "Differential Equations", 10: "Vector Algebra", 11: "Three Dimensional Geometry", 12: "Linear Programming", 13: "Probability", } def parse_chapter_number(file_path: Path) -> int | None: match = re.search(r"chapter\s*([0-9]+)", file_path.stem, re.IGNORECASE) if not match: return None return int(match.group(1)) def clean_title(text: str) -> str: text = re.sub(r"\s+", " ", text.replace("_", " ")).strip() return text.title() def pdf_metadata(file_path: Path) -> dict[str, str | int]: chapter_number = parse_chapter_number(file_path) chapter_name = CHAPTER_CATALOG.get(chapter_number or 0, clean_title(file_path.stem)) return { "chapter_number": chapter_number or -1, "chapter_name": chapter_name, "source_file": file_path.name, }