| | """ |
| | Citation quality checker. |
| | |
| | Validates: |
| | - Old citations (>30 years) that might need updating |
| | - Citation formatting patterns (et al., hardcoded citations, etc.) |
| | """ |
| | import re |
| | from typing import List, Dict |
| | from datetime import datetime |
| | from collections import defaultdict |
| |
|
| | from .base import BaseChecker, CheckResult, CheckSeverity |
| |
|
| |
|
| | class CitationQualityChecker(BaseChecker): |
| | """Check citation quality and balance.""" |
| | |
| | name = "citation_quality" |
| | display_name = "Citation Quality" |
| | description = "Check citation age, balance, and formatting" |
| | |
| | |
| | OLD_CITATION_YEARS = 30 |
| | |
| | CURRENT_YEAR = datetime.now().year |
| | |
| | def check(self, tex_content: str, config: dict = None) -> List[CheckResult]: |
| | results = [] |
| | |
| | |
| | |
| | |
| | |
| | old_cite_results = self._check_old_citations_in_text(tex_content) |
| | results.extend(old_cite_results) |
| | |
| | |
| | format_results = self._check_citation_formatting(tex_content) |
| | results.extend(format_results) |
| | |
| | return results |
| | |
| | def _check_old_citations_in_text(self, content: str) -> List[CheckResult]: |
| | """Look for citations with old years visible in text.""" |
| | results = [] |
| | lines = content.split('\n') |
| | |
| | |
| | year_pattern = re.compile( |
| | r'(?:\([^)]*(?:19[89]\d|20[01]\d)[^)]*\)|' |
| | r'\b(?:19[89]\d|20[01]\d)\b)', |
| | re.IGNORECASE |
| | ) |
| | |
| | old_years_found = set() |
| | |
| | for line_num, line in enumerate(lines, 1): |
| | |
| | if self._is_comment_line(line): |
| | continue |
| | |
| | for match in year_pattern.finditer(line): |
| | year_str = re.search(r'(19[89]\d|20[01]\d)', match.group()) |
| | if year_str: |
| | year = int(year_str.group()) |
| | age = self.CURRENT_YEAR - year |
| | |
| | if age >= self.OLD_CITATION_YEARS and year not in old_years_found: |
| | old_years_found.add(year) |
| | results.append(self._create_result( |
| | passed=False, |
| | severity=CheckSeverity.INFO, |
| | message=f"Citation from {year} ({age} years old)", |
| | line_number=line_num, |
| | suggestion=f"Consider if there's more recent work on this topic" |
| | )) |
| | |
| | return results |
| | |
| | def _check_citation_formatting(self, content: str) -> List[CheckResult]: |
| | """Check for common citation formatting issues.""" |
| | results = [] |
| | lines = content.split('\n') |
| | |
| | for line_num, line in enumerate(lines, 1): |
| | if line.lstrip().startswith('%'): |
| | continue |
| | |
| | |
| | if re.search(r'\bet al\b(?!\.)', line): |
| | results.append(self._create_result( |
| | passed=False, |
| | severity=CheckSeverity.WARNING, |
| | message="'et al' should be 'et al.'", |
| | line_number=line_num, |
| | suggestion="Add period after 'et al.'" |
| | )) |
| | |
| | |
| | |
| | if re.search(r'\[\d+\]', line): |
| | |
| | if '\\newcommand' in line or '\\renewcommand' in line or '\\def' in line: |
| | continue |
| | |
| | if re.search(r'\\[a-zA-Z]+\[\d+\]', line): |
| | continue |
| | |
| | if '\\cite' not in line and not re.search(r'\\[a-zA-Z]+\{', line[:20]): |
| | results.append(self._create_result( |
| | passed=False, |
| | severity=CheckSeverity.INFO, |
| | message="Numeric citation style detected", |
| | line_number=line_num, |
| | suggestion="Consider author-year style for better readability" |
| | )) |
| | |
| | |
| | if re.search(r'\([A-Z][a-z]+(?:\s+et\s+al\.?)?,?\s*\d{4}\)', line): |
| | if '\\cite' not in line: |
| | results.append(self._create_result( |
| | passed=False, |
| | severity=CheckSeverity.WARNING, |
| | message="Appears to be hardcoded citation instead of \\cite", |
| | line_number=line_num, |
| | line_content=line.strip()[:80], |
| | suggestion="Use \\cite{} for proper bibliography management" |
| | )) |
| | |
| | return results |
| |
|