|
|
""" |
|
|
Sentence quality checker. |
|
|
|
|
|
Validates: |
|
|
- Weak sentence starters |
|
|
- Common writing issues |
|
|
""" |
|
|
import re |
|
|
from typing import List |
|
|
|
|
|
from .base import BaseChecker, CheckResult, CheckSeverity |
|
|
|
|
|
|
|
|
class SentenceChecker(BaseChecker): |
|
|
"""Check sentence quality and readability.""" |
|
|
|
|
|
name = "sentence" |
|
|
display_name = "Sentence Quality" |
|
|
description = "Check weak patterns and writing issues" |
|
|
|
|
|
|
|
|
WEAK_STARTERS = [ |
|
|
(r'^There\s+(is|are|was|were|has been|have been)\s+', |
|
|
"Weak start with 'There is/are'"), |
|
|
(r'^It\s+(is|was|has been|should be noted)\s+', |
|
|
"Weak start with 'It is'"), |
|
|
(r'^This\s+(is|was|shows|demonstrates)\s+', |
|
|
"Vague 'This' without clear antecedent"), |
|
|
(r'^As\s+(mentioned|discussed|shown|noted)\s+(above|before|earlier|previously)', |
|
|
"Consider being more specific about what was mentioned"), |
|
|
] |
|
|
|
|
|
|
|
|
WEASEL_PATTERNS = [ |
|
|
(r'\b(many|some|most|several)\s+(researchers?|studies|papers?|works?)\s+(have\s+)?(shown?|demonstrated?|suggested?|believe)', |
|
|
"Vague attribution - consider citing specific work"), |
|
|
(r'\b(obviously|clearly|of course|needless to say|it is well known)\b', |
|
|
"Unsupported assertion - consider citing or removing"), |
|
|
(r'\b(very|really|quite|extremely|highly)\s+(important|significant|good|effective)', |
|
|
"Consider more precise language"), |
|
|
(r'\bit\s+is\s+(important|crucial|essential|necessary)\s+to\s+note\s+that', |
|
|
"Wordy phrase - consider simplifying"), |
|
|
] |
|
|
|
|
|
|
|
|
REDUNDANT_PATTERNS = [ |
|
|
(r'\bin order to\b', "Use 'to' instead of 'in order to'"), |
|
|
(r'\bdue to the fact that\b', "Use 'because' instead"), |
|
|
(r'\bat this point in time\b', "Use 'now' or 'currently'"), |
|
|
(r'\bin the event that\b', "Use 'if' instead"), |
|
|
(r'\bdespite the fact that\b', "Use 'although' instead"), |
|
|
(r'\bfor the purpose of\b', "Use 'to' or 'for' instead"), |
|
|
(r'\bwith the exception of\b', "Use 'except' instead"), |
|
|
(r'\bin close proximity to\b', "Use 'near' instead"), |
|
|
(r'\ba large number of\b', "Use 'many' instead"), |
|
|
(r'\bthe vast majority of\b', "Use 'most' instead"), |
|
|
] |
|
|
|
|
|
def check(self, tex_content: str, config: dict = None) -> List[CheckResult]: |
|
|
results = [] |
|
|
lines = tex_content.split('\n') |
|
|
|
|
|
for line_num, line in enumerate(lines, 1): |
|
|
|
|
|
if self._is_comment_line(line): |
|
|
continue |
|
|
|
|
|
|
|
|
line_content = self._remove_line_comment(line) |
|
|
|
|
|
|
|
|
for pattern, message in self.WEAK_STARTERS: |
|
|
if re.search(pattern, line_content, re.IGNORECASE): |
|
|
results.append(self._create_result( |
|
|
passed=False, |
|
|
severity=CheckSeverity.INFO, |
|
|
message=message, |
|
|
line_number=line_num, |
|
|
line_content=line.strip()[:80] |
|
|
)) |
|
|
break |
|
|
|
|
|
|
|
|
for pattern, message in self.WEASEL_PATTERNS: |
|
|
match = re.search(pattern, line_content, re.IGNORECASE) |
|
|
if match: |
|
|
results.append(self._create_result( |
|
|
passed=False, |
|
|
severity=CheckSeverity.INFO, |
|
|
message=f"Hedging language: '{match.group(0)[:30]}'", |
|
|
line_number=line_num, |
|
|
suggestion=message |
|
|
)) |
|
|
|
|
|
|
|
|
for pattern, message in self.REDUNDANT_PATTERNS: |
|
|
match = re.search(pattern, line_content, re.IGNORECASE) |
|
|
if match: |
|
|
results.append(self._create_result( |
|
|
passed=False, |
|
|
severity=CheckSeverity.INFO, |
|
|
message=f"Redundant phrase: '{match.group(0)}'", |
|
|
line_number=line_num, |
|
|
suggestion=message |
|
|
)) |
|
|
|
|
|
return results |
|
|
|