File size: 3,193 Bytes
46df5f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
"""
Number and unit formatting checker.

Validates:
- Percentage format consistency (no space before %, consistent use of % vs 'percent')
"""
import re
from typing import List

from .base import BaseChecker, CheckResult, CheckSeverity


class NumberChecker(BaseChecker):
    """Check percentage formatting."""
    
    name = "number"
    display_name = "Numbers & Units"
    description = "Check percentage formatting"
    
    # Percentage patterns
    PERCENT_WITH_SPACE = re.compile(r'\d\s+%')  # "50 %" is wrong
    
    # Inconsistent percentage usage
    PERCENT_WORD = re.compile(r'\d+\s+percent\b', re.IGNORECASE)
    PERCENT_SYMBOL = re.compile(r'\d+%')
    
    def check(self, tex_content: str, config: dict = None) -> List[CheckResult]:
        results = []
        lines = tex_content.split('\n')
        
        # Track percentage style for consistency check
        uses_symbol = False
        uses_word = False
        
        for line_num, line in enumerate(lines, 1):
            # Skip comments using base class method
            if self._is_comment_line(line):
                continue
            
            # Skip lines that are likely in math/tables
            if self._in_special_context(line):
                continue
            
            # Skip lines that look like math formulas (contain common math commands)
            if re.search(r'\\(frac|sum|prod|int|partial|nabla|approx|neq|leq|geq|log|ln|exp|sin|cos|tan|alpha|beta|gamma|delta|theta|sigma|omega|left|right)', line):
                continue
            
            line_content = re.sub(r'(?<!\\)%.*$', '', line)
            
            # Check for space before percent sign
            for match in self.PERCENT_WITH_SPACE.finditer(line_content):
                results.append(self._create_result(
                    passed=False,
                    severity=CheckSeverity.WARNING,
                    message="Space before percent sign",
                    line_number=line_num,
                    suggestion="Remove space: '50%' not '50 %'"
                ))
            
            # Track percentage style
            if self.PERCENT_WORD.search(line_content):
                uses_word = True
            if self.PERCENT_SYMBOL.search(line_content):
                uses_symbol = True
        
        # Check percentage consistency
        if uses_word and uses_symbol:
            results.append(self._create_result(
                passed=False,
                severity=CheckSeverity.INFO,
                message="Mixed percentage notation: both '%' and 'percent' used",
                suggestion="Use consistent notation throughout the paper"
            ))
        
        return results
    
    def _in_special_context(self, line: str) -> bool:
        """Check if line is in a context where number rules don't apply."""
        special_patterns = [
            r'\\begin\{(tabular|array|equation|align|gather)',
            r'\\includegraphics',
            r'\\caption',
            r'\\label',
            r'\\ref',
            r'^\s*&',  # Table cell
            r'\$.*\$',  # Inline math
        ]
        return any(re.search(p, line) for p in special_patterns)