Spaces:
Sleeping
Sleeping
| """ | |
| Cross-reference checker. | |
| Validates that: | |
| - All figures and tables are referenced in text | |
| - All labels have corresponding references | |
| - Appendix sections are referenced in main text | |
| """ | |
| import re | |
| from typing import List, Set, Tuple | |
| from .base import BaseChecker, CheckResult, CheckSeverity | |
| class ReferenceChecker(BaseChecker): | |
| """Check cross-reference integrity in the document.""" | |
| name = "reference" | |
| display_name = "Cross-References" | |
| description = "Verify all figures, tables, and sections are properly referenced" | |
| # Label pattern: \label{prefix:name} | |
| LABEL_PATTERN = re.compile(r'\\label\{([^}]+)\}') | |
| # Reference patterns | |
| REF_PATTERNS = [ | |
| re.compile(r'\\ref\{([^}]+)\}'), | |
| re.compile(r'\\autoref\{([^}]+)\}'), | |
| re.compile(r'\\cref\{([^}]+)\}'), | |
| re.compile(r'\\Cref\{([^}]+)\}'), | |
| re.compile(r'\\eqref\{([^}]+)\}'), | |
| re.compile(r'\\pageref\{([^}]+)\}'), | |
| re.compile(r'\\nameref\{([^}]+)\}'), | |
| re.compile(r'\\Sref\{([^}]+)\}'), | |
| ] | |
| # Appendix detection | |
| APPENDIX_START_PATTERN = re.compile(r'\\appendix\b|\\begin\{appendix\}') | |
| SECTION_PATTERN = re.compile(r'\\section\*?\{([^}]+)\}') | |
| def check(self, tex_content: str, config: dict = None) -> List[CheckResult]: | |
| results = [] | |
| # Extract all labels and their positions | |
| labels = self._extract_labels(tex_content) | |
| # Extract all references | |
| references = self._extract_references(tex_content) | |
| # Find unreferenced labels | |
| for label, (line_num, line_content) in labels.items(): | |
| if label not in references: | |
| # Determine severity based on label type | |
| severity = self._get_severity_for_label(label) | |
| label_type = self._get_label_type(label) | |
| results.append(self._create_result( | |
| passed=False, | |
| severity=severity, | |
| message=f"Unreferenced {label_type}: '{label}'", | |
| line_number=line_num, | |
| line_content=line_content, | |
| suggestion=f"Add \\ref{{{label}}} or \\autoref{{{label}}} where appropriate" | |
| )) | |
| # Find undefined references (refs without labels) | |
| for ref, (line_num, line_content) in references.items(): | |
| if ref not in labels: | |
| results.append(self._create_result( | |
| passed=False, | |
| severity=CheckSeverity.ERROR, | |
| message=f"Reference to undefined label: '{ref}'", | |
| line_number=line_num, | |
| line_content=line_content, | |
| suggestion=f"Add \\label{{{ref}}} to the target element or fix the reference" | |
| )) | |
| # Check appendix sections | |
| appendix_results = self._check_appendix_references(tex_content, labels, references) | |
| results.extend(appendix_results) | |
| return results | |
| def _extract_labels(self, content: str) -> dict: | |
| """Extract all labels with their line numbers.""" | |
| labels = {} | |
| for match in self.LABEL_PATTERN.finditer(content): | |
| if not self._is_commented(content, match.start()): | |
| label = match.group(1) | |
| line_num = self._find_line_number(content, match.start()) | |
| line_content = self._get_line_content(content, line_num) | |
| labels[label] = (line_num, line_content) | |
| return labels | |
| def _extract_references(self, content: str) -> dict: | |
| """Extract all references with their line numbers.""" | |
| references = {} | |
| for pattern in self.REF_PATTERNS: | |
| for match in pattern.finditer(content): | |
| if not self._is_commented(content, match.start()): | |
| # Handle comma-separated refs like \ref{fig:a,fig:b} | |
| refs_str = match.group(1) | |
| for ref in refs_str.split(','): | |
| ref = ref.strip() | |
| if ref and ref not in references: | |
| # Skip if ref looks like command parameter (#1, #2) | |
| if ref.startswith('#') and len(ref) == 2 and ref[1].isdigit(): | |
| continue | |
| # Skip if inside \newcommand or \renewcommand definition | |
| line_num = self._find_line_number(content, match.start()) | |
| line_content = self._get_line_content(content, line_num) | |
| if re.search(r'\\(new|renew|provide)command', line_content): | |
| continue | |
| references[ref] = (line_num, line_content) | |
| return references | |
| def _get_label_type(self, label: str) -> str: | |
| """Determine the type of a label based on its prefix.""" | |
| if ':' in label: | |
| prefix = label.split(':')[0].lower() | |
| type_map = { | |
| 'fig': 'figure', | |
| 'tab': 'table', | |
| 'sec': 'section', | |
| 'eq': 'equation', | |
| 'alg': 'algorithm', | |
| 'lst': 'listing', | |
| 'app': 'appendix', | |
| } | |
| return type_map.get(prefix, 'label') | |
| return 'label' | |
| def _get_severity_for_label(self, label: str) -> CheckSeverity: | |
| """Determine severity based on label type.""" | |
| label_type = self._get_label_type(label) | |
| # Figures and tables should always be referenced | |
| if label_type in ('figure', 'table'): | |
| return CheckSeverity.WARNING | |
| # Equations might not always need explicit reference | |
| if label_type == 'equation': | |
| return CheckSeverity.INFO | |
| return CheckSeverity.WARNING | |
| def _check_appendix_references( | |
| self, | |
| content: str, | |
| labels: dict, | |
| references: dict | |
| ) -> List[CheckResult]: | |
| """Check that appendix sections are referenced in main text.""" | |
| results = [] | |
| # Find where appendix starts | |
| appendix_match = self.APPENDIX_START_PATTERN.search(content) | |
| if not appendix_match: | |
| return results | |
| appendix_start = appendix_match.start() | |
| main_content = content[:appendix_start] | |
| appendix_content = content[appendix_start:] | |
| # Find section labels in appendix | |
| for match in self.LABEL_PATTERN.finditer(appendix_content): | |
| if self._is_commented(appendix_content, match.start()): | |
| continue | |
| label = match.group(1) | |
| # Check if this label is for a section | |
| if 'sec' in label.lower() or 'app' in label.lower(): | |
| # Check if referenced in main text (before appendix) | |
| is_referenced = False | |
| for pattern in self.REF_PATTERNS: | |
| if pattern.search(main_content) and label in main_content: | |
| for m in pattern.finditer(main_content): | |
| if label in m.group(1): | |
| is_referenced = True | |
| break | |
| if is_referenced: | |
| break | |
| if not is_referenced: | |
| line_num = self._find_line_number(content, appendix_start + match.start()) | |
| results.append(self._create_result( | |
| passed=False, | |
| severity=CheckSeverity.WARNING, | |
| message=f"Appendix section '{label}' is not referenced in main text", | |
| line_number=line_num, | |
| suggestion="Add a reference to this appendix section in the main text" | |
| )) | |
| return results | |