|
|
""" |
|
|
Cross-reference checker. |
|
|
|
|
|
Validates that: |
|
|
- All figures and tables are referenced in text |
|
|
- All labels have corresponding references |
|
|
- Appendix sections are referenced in main text |
|
|
""" |
|
|
import re |
|
|
from typing import List, Set, Tuple |
|
|
|
|
|
from .base import BaseChecker, CheckResult, CheckSeverity |
|
|
|
|
|
|
|
|
class ReferenceChecker(BaseChecker): |
|
|
"""Check cross-reference integrity in the document.""" |
|
|
|
|
|
name = "reference" |
|
|
display_name = "Cross-References" |
|
|
description = "Verify all figures, tables, and sections are properly referenced" |
|
|
|
|
|
|
|
|
LABEL_PATTERN = re.compile(r'\\label\{([^}]+)\}') |
|
|
|
|
|
|
|
|
REF_PATTERNS = [ |
|
|
re.compile(r'\\ref\{([^}]+)\}'), |
|
|
re.compile(r'\\autoref\{([^}]+)\}'), |
|
|
re.compile(r'\\cref\{([^}]+)\}'), |
|
|
re.compile(r'\\Cref\{([^}]+)\}'), |
|
|
re.compile(r'\\eqref\{([^}]+)\}'), |
|
|
re.compile(r'\\pageref\{([^}]+)\}'), |
|
|
re.compile(r'\\nameref\{([^}]+)\}'), |
|
|
re.compile(r'\\Sref\{([^}]+)\}'), |
|
|
] |
|
|
|
|
|
|
|
|
APPENDIX_START_PATTERN = re.compile(r'\\appendix\b|\\begin\{appendix\}') |
|
|
SECTION_PATTERN = re.compile(r'\\section\*?\{([^}]+)\}') |
|
|
|
|
|
def check(self, tex_content: str, config: dict = None) -> List[CheckResult]: |
|
|
results = [] |
|
|
|
|
|
|
|
|
labels = self._extract_labels(tex_content) |
|
|
|
|
|
|
|
|
references = self._extract_references(tex_content) |
|
|
|
|
|
|
|
|
for label, (line_num, line_content) in labels.items(): |
|
|
if label not in references: |
|
|
|
|
|
severity = self._get_severity_for_label(label) |
|
|
label_type = self._get_label_type(label) |
|
|
|
|
|
results.append(self._create_result( |
|
|
passed=False, |
|
|
severity=severity, |
|
|
message=f"Unreferenced {label_type}: '{label}'", |
|
|
line_number=line_num, |
|
|
line_content=line_content, |
|
|
suggestion=f"Add \\ref{{{label}}} or \\autoref{{{label}}} where appropriate" |
|
|
)) |
|
|
|
|
|
|
|
|
for ref, (line_num, line_content) in references.items(): |
|
|
if ref not in labels: |
|
|
results.append(self._create_result( |
|
|
passed=False, |
|
|
severity=CheckSeverity.ERROR, |
|
|
message=f"Reference to undefined label: '{ref}'", |
|
|
line_number=line_num, |
|
|
line_content=line_content, |
|
|
suggestion=f"Add \\label{{{ref}}} to the target element or fix the reference" |
|
|
)) |
|
|
|
|
|
|
|
|
appendix_results = self._check_appendix_references(tex_content, labels, references) |
|
|
results.extend(appendix_results) |
|
|
|
|
|
return results |
|
|
|
|
|
def _extract_labels(self, content: str) -> dict: |
|
|
"""Extract all labels with their line numbers.""" |
|
|
labels = {} |
|
|
for match in self.LABEL_PATTERN.finditer(content): |
|
|
if not self._is_commented(content, match.start()): |
|
|
label = match.group(1) |
|
|
line_num = self._find_line_number(content, match.start()) |
|
|
line_content = self._get_line_content(content, line_num) |
|
|
labels[label] = (line_num, line_content) |
|
|
return labels |
|
|
|
|
|
def _extract_references(self, content: str) -> dict: |
|
|
"""Extract all references with their line numbers.""" |
|
|
references = {} |
|
|
for pattern in self.REF_PATTERNS: |
|
|
for match in pattern.finditer(content): |
|
|
if not self._is_commented(content, match.start()): |
|
|
|
|
|
refs_str = match.group(1) |
|
|
for ref in refs_str.split(','): |
|
|
ref = ref.strip() |
|
|
if ref and ref not in references: |
|
|
|
|
|
if ref.startswith('#') and len(ref) == 2 and ref[1].isdigit(): |
|
|
continue |
|
|
|
|
|
|
|
|
line_num = self._find_line_number(content, match.start()) |
|
|
line_content = self._get_line_content(content, line_num) |
|
|
if re.search(r'\\(new|renew|provide)command', line_content): |
|
|
continue |
|
|
|
|
|
references[ref] = (line_num, line_content) |
|
|
return references |
|
|
|
|
|
def _get_label_type(self, label: str) -> str: |
|
|
"""Determine the type of a label based on its prefix.""" |
|
|
if ':' in label: |
|
|
prefix = label.split(':')[0].lower() |
|
|
type_map = { |
|
|
'fig': 'figure', |
|
|
'tab': 'table', |
|
|
'sec': 'section', |
|
|
'eq': 'equation', |
|
|
'alg': 'algorithm', |
|
|
'lst': 'listing', |
|
|
'app': 'appendix', |
|
|
} |
|
|
return type_map.get(prefix, 'label') |
|
|
return 'label' |
|
|
|
|
|
def _get_severity_for_label(self, label: str) -> CheckSeverity: |
|
|
"""Determine severity based on label type.""" |
|
|
label_type = self._get_label_type(label) |
|
|
|
|
|
|
|
|
if label_type in ('figure', 'table'): |
|
|
return CheckSeverity.WARNING |
|
|
|
|
|
|
|
|
if label_type == 'equation': |
|
|
return CheckSeverity.INFO |
|
|
|
|
|
return CheckSeverity.WARNING |
|
|
|
|
|
def _check_appendix_references( |
|
|
self, |
|
|
content: str, |
|
|
labels: dict, |
|
|
references: dict |
|
|
) -> List[CheckResult]: |
|
|
"""Check that appendix sections are referenced in main text.""" |
|
|
results = [] |
|
|
|
|
|
|
|
|
appendix_match = self.APPENDIX_START_PATTERN.search(content) |
|
|
if not appendix_match: |
|
|
return results |
|
|
|
|
|
appendix_start = appendix_match.start() |
|
|
main_content = content[:appendix_start] |
|
|
appendix_content = content[appendix_start:] |
|
|
|
|
|
|
|
|
for match in self.LABEL_PATTERN.finditer(appendix_content): |
|
|
if self._is_commented(appendix_content, match.start()): |
|
|
continue |
|
|
|
|
|
label = match.group(1) |
|
|
|
|
|
|
|
|
if 'sec' in label.lower() or 'app' in label.lower(): |
|
|
|
|
|
is_referenced = False |
|
|
for pattern in self.REF_PATTERNS: |
|
|
if pattern.search(main_content) and label in main_content: |
|
|
for m in pattern.finditer(main_content): |
|
|
if label in m.group(1): |
|
|
is_referenced = True |
|
|
break |
|
|
if is_referenced: |
|
|
break |
|
|
|
|
|
if not is_referenced: |
|
|
line_num = self._find_line_number(content, appendix_start + match.start()) |
|
|
results.append(self._create_result( |
|
|
passed=False, |
|
|
severity=CheckSeverity.WARNING, |
|
|
message=f"Appendix section '{label}' is not referenced in main text", |
|
|
line_number=line_num, |
|
|
suggestion="Add a reference to this appendix section in the main text" |
|
|
)) |
|
|
|
|
|
return results |
|
|
|