File size: 4,413 Bytes
46df5f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
"""
Sentence quality checker.

Validates:
- Weak sentence starters
- Common writing issues
"""
import re
from typing import List

from .base import BaseChecker, CheckResult, CheckSeverity


class SentenceChecker(BaseChecker):
    """Check sentence quality and readability."""
    
    name = "sentence"
    display_name = "Sentence Quality"
    description = "Check weak patterns and writing issues"
    
    # Weak sentence starters (avoid these)
    WEAK_STARTERS = [
        (r'^There\s+(is|are|was|were|has been|have been)\s+', 
         "Weak start with 'There is/are'"),
        (r'^It\s+(is|was|has been|should be noted)\s+',
         "Weak start with 'It is'"),
        (r'^This\s+(is|was|shows|demonstrates)\s+',
         "Vague 'This' without clear antecedent"),
        (r'^As\s+(mentioned|discussed|shown|noted)\s+(above|before|earlier|previously)',
         "Consider being more specific about what was mentioned"),
    ]
    
    # Weasel words and hedging
    WEASEL_PATTERNS = [
        (r'\b(many|some|most|several)\s+(researchers?|studies|papers?|works?)\s+(have\s+)?(shown?|demonstrated?|suggested?|believe)',
         "Vague attribution - consider citing specific work"),
        (r'\b(obviously|clearly|of course|needless to say|it is well known)\b',
         "Unsupported assertion - consider citing or removing"),
        (r'\b(very|really|quite|extremely|highly)\s+(important|significant|good|effective)',
         "Consider more precise language"),
        (r'\bit\s+is\s+(important|crucial|essential|necessary)\s+to\s+note\s+that',
         "Wordy phrase - consider simplifying"),
    ]
    
    # Redundant phrases
    REDUNDANT_PATTERNS = [
        (r'\bin order to\b', "Use 'to' instead of 'in order to'"),
        (r'\bdue to the fact that\b', "Use 'because' instead"),
        (r'\bat this point in time\b', "Use 'now' or 'currently'"),
        (r'\bin the event that\b', "Use 'if' instead"),
        (r'\bdespite the fact that\b', "Use 'although' instead"),
        (r'\bfor the purpose of\b', "Use 'to' or 'for' instead"),
        (r'\bwith the exception of\b', "Use 'except' instead"),
        (r'\bin close proximity to\b', "Use 'near' instead"),
        (r'\ba large number of\b', "Use 'many' instead"),
        (r'\bthe vast majority of\b', "Use 'most' instead"),
    ]
    
    def check(self, tex_content: str, config: dict = None) -> List[CheckResult]:
        results = []
        lines = tex_content.split('\n')
        
        for line_num, line in enumerate(lines, 1):
            # Skip commented lines using base class method
            if self._is_comment_line(line):
                continue
            
            # Remove inline comments using base class method
            line_content = self._remove_line_comment(line)
            
            # Check weak starters
            for pattern, message in self.WEAK_STARTERS:
                if re.search(pattern, line_content, re.IGNORECASE):
                    results.append(self._create_result(
                        passed=False,
                        severity=CheckSeverity.INFO,
                        message=message,
                        line_number=line_num,
                        line_content=line.strip()[:80]
                    ))
                    break  # One per line
            
            # Check weasel words
            for pattern, message in self.WEASEL_PATTERNS:
                match = re.search(pattern, line_content, re.IGNORECASE)
                if match:
                    results.append(self._create_result(
                        passed=False,
                        severity=CheckSeverity.INFO,
                        message=f"Hedging language: '{match.group(0)[:30]}'",
                        line_number=line_num,
                        suggestion=message
                    ))
            
            # Check redundant phrases
            for pattern, message in self.REDUNDANT_PATTERNS:
                match = re.search(pattern, line_content, re.IGNORECASE)
                if match:
                    results.append(self._create_result(
                        passed=False,
                        severity=CheckSeverity.INFO,
                        message=f"Redundant phrase: '{match.group(0)}'",
                        line_number=line_num,
                        suggestion=message
                    ))
        
        return results