File size: 8,044 Bytes
46df5f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79d7264
46df5f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79d7264
46df5f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
"""
Cross-reference checker.

Validates that:
- All figures and tables are referenced in text
- All labels have corresponding references
- Appendix sections are referenced in main text
"""
import re
from typing import List, Set, Tuple

from .base import BaseChecker, CheckResult, CheckSeverity


class ReferenceChecker(BaseChecker):
    """Check cross-reference integrity in the document."""
    
    name = "reference"
    display_name = "Cross-References"
    description = "Verify all figures, tables, and sections are properly referenced"
    
    # Label pattern: \label{prefix:name}
    LABEL_PATTERN = re.compile(r'\\label\{([^}]+)\}')
    
    # Reference patterns
    REF_PATTERNS = [
        re.compile(r'\\ref\{([^}]+)\}'),
        re.compile(r'\\autoref\{([^}]+)\}'),
        re.compile(r'\\cref\{([^}]+)\}'),
        re.compile(r'\\Cref\{([^}]+)\}'),
        re.compile(r'\\eqref\{([^}]+)\}'),
        re.compile(r'\\pageref\{([^}]+)\}'),
        re.compile(r'\\nameref\{([^}]+)\}'),
        re.compile(r'\\Sref\{([^}]+)\}'),
    ]
    
    # Appendix detection
    APPENDIX_START_PATTERN = re.compile(r'\\appendix\b|\\begin\{appendix\}')
    SECTION_PATTERN = re.compile(r'\\section\*?\{([^}]+)\}')
    
    def check(self, tex_content: str, config: dict = None) -> List[CheckResult]:
        results = []
        
        # Extract all labels and their positions
        labels = self._extract_labels(tex_content)
        
        # Extract all references
        references = self._extract_references(tex_content)
        
        # Find unreferenced labels
        for label, (line_num, line_content) in labels.items():
            if label not in references:
                # Determine severity based on label type
                severity = self._get_severity_for_label(label)
                label_type = self._get_label_type(label)
                
                results.append(self._create_result(
                    passed=False,
                    severity=severity,
                    message=f"Unreferenced {label_type}: '{label}'",
                    line_number=line_num,
                    line_content=line_content,
                    suggestion=f"Add \\ref{{{label}}} or \\autoref{{{label}}} where appropriate"
                ))
        
        # Find undefined references (refs without labels)
        for ref, (line_num, line_content) in references.items():
            if ref not in labels:
                results.append(self._create_result(
                    passed=False,
                    severity=CheckSeverity.ERROR,
                    message=f"Reference to undefined label: '{ref}'",
                    line_number=line_num,
                    line_content=line_content,
                    suggestion=f"Add \\label{{{ref}}} to the target element or fix the reference"
                ))
        
        # Check appendix sections
        appendix_results = self._check_appendix_references(tex_content, labels, references)
        results.extend(appendix_results)
        
        return results
    
    def _extract_labels(self, content: str) -> dict:
        """Extract all labels with their line numbers."""
        labels = {}
        for match in self.LABEL_PATTERN.finditer(content):
            if not self._is_commented(content, match.start()):
                label = match.group(1)
                line_num = self._find_line_number(content, match.start())
                line_content = self._get_line_content(content, line_num)
                labels[label] = (line_num, line_content)
        return labels
    
    def _extract_references(self, content: str) -> dict:
        """Extract all references with their line numbers."""
        references = {}
        for pattern in self.REF_PATTERNS:
            for match in pattern.finditer(content):
                if not self._is_commented(content, match.start()):
                    # Handle comma-separated refs like \ref{fig:a,fig:b}
                    refs_str = match.group(1)
                    for ref in refs_str.split(','):
                        ref = ref.strip()
                        if ref and ref not in references:
                            # Skip if ref looks like command parameter (#1, #2)
                            if ref.startswith('#') and len(ref) == 2 and ref[1].isdigit():
                                continue
                            
                            # Skip if inside \newcommand or \renewcommand definition
                            line_num = self._find_line_number(content, match.start())
                            line_content = self._get_line_content(content, line_num)
                            if re.search(r'\\(new|renew|provide)command', line_content):
                                continue
                            
                            references[ref] = (line_num, line_content)
        return references
    
    def _get_label_type(self, label: str) -> str:
        """Determine the type of a label based on its prefix."""
        if ':' in label:
            prefix = label.split(':')[0].lower()
            type_map = {
                'fig': 'figure',
                'tab': 'table',
                'sec': 'section',
                'eq': 'equation',
                'alg': 'algorithm',
                'lst': 'listing',
                'app': 'appendix',
            }
            return type_map.get(prefix, 'label')
        return 'label'
    
    def _get_severity_for_label(self, label: str) -> CheckSeverity:
        """Determine severity based on label type."""
        label_type = self._get_label_type(label)
        
        # Figures and tables should always be referenced
        if label_type in ('figure', 'table'):
            return CheckSeverity.WARNING
        
        # Equations might not always need explicit reference
        if label_type == 'equation':
            return CheckSeverity.INFO
        
        return CheckSeverity.WARNING
    
    def _check_appendix_references(
        self, 
        content: str, 
        labels: dict, 
        references: dict
    ) -> List[CheckResult]:
        """Check that appendix sections are referenced in main text."""
        results = []
        
        # Find where appendix starts
        appendix_match = self.APPENDIX_START_PATTERN.search(content)
        if not appendix_match:
            return results
        
        appendix_start = appendix_match.start()
        main_content = content[:appendix_start]
        appendix_content = content[appendix_start:]
        
        # Find section labels in appendix
        for match in self.LABEL_PATTERN.finditer(appendix_content):
            if self._is_commented(appendix_content, match.start()):
                continue
            
            label = match.group(1)
            
            # Check if this label is for a section
            if 'sec' in label.lower() or 'app' in label.lower():
                # Check if referenced in main text (before appendix)
                is_referenced = False
                for pattern in self.REF_PATTERNS:
                    if pattern.search(main_content) and label in main_content:
                        for m in pattern.finditer(main_content):
                            if label in m.group(1):
                                is_referenced = True
                                break
                    if is_referenced:
                        break
                
                if not is_referenced:
                    line_num = self._find_line_number(content, appendix_start + match.start())
                    results.append(self._create_result(
                        passed=False,
                        severity=CheckSeverity.WARNING,
                        message=f"Appendix section '{label}' is not referenced in main text",
                        line_number=line_num,
                        suggestion="Add a reference to this appendix section in the main text"
                    ))
        
        return results