Spaces:

thinkwee
/

BibGuard

Running

File size: 6,297 Bytes

46df5f0

"""
Base checker class for paper submission quality checks.

All specific checkers inherit from BaseChecker and implement
the check() method to validate specific aspects of the TeX document.
"""
import re
from abc import ABC, abstractmethod
from dataclasses import dataclass
from enum import Enum
from typing import List, Optional, Tuple


class CheckSeverity(Enum):
    """Severity levels for check results."""
    ERROR = "error"         # Must fix before submission
    WARNING = "warning"     # Strongly recommended to fix
    INFO = "info"           # Suggestion or best practice


@dataclass
class CheckResult:
    """Result of a single check."""
    checker_name: str
    passed: bool
    severity: CheckSeverity
    message: str
    line_number: Optional[int] = None
    line_content: Optional[str] = None
    suggestion: Optional[str] = None
    file_path: Optional[str] = None
    
    def to_dict(self) -> dict:
        return {
            'checker': self.checker_name,
            'passed': self.passed,
            'severity': self.severity.value,
            'message': self.message,
            'line': self.line_number,
            'content': self.line_content,
            'suggestion': self.suggestion,
            'file_path': self.file_path
        }


class BaseChecker(ABC):
    """
    Abstract base class for all paper submission checkers.
    
    Each checker validates a specific aspect of the paper,
    such as caption placement, reference integrity, or formatting.
    """
    
    # Checker metadata - override in subclasses
    name: str = "base"
    display_name: str = "Base Checker"
    description: str = "Base checker class"
    
    @abstractmethod
    def check(self, tex_content: str, config: dict = None) -> List[CheckResult]:
        """
        Run the check on the given TeX content.
        
        Args:
            tex_content: The full content of the TeX file
            config: Optional configuration dict (e.g., conference-specific settings)
            
        Returns:
            List of CheckResult objects describing found issues
        """
        pass
    
    def _remove_comments(self, content: str) -> str:
        """
        Remove all LaTeX comments from content.
        
        Preserves line structure (replaces comment with empty string on same line).
        Handles escaped percent signs (\\%) correctly.
        """
        lines = content.split('\n')
        result = []
        
        for line in lines:
            # Find first unescaped % 
            cleaned = self._remove_line_comment(line)
            result.append(cleaned)
        
        return '\n'.join(result)
    
    def _remove_line_comment(self, line: str) -> str:
        """Remove comment from a single line, preserving content before %."""
        i = 0
        while i < len(line):
            if line[i] == '%':
                # Check if escaped
                num_backslashes = 0
                j = i - 1
                while j >= 0 and line[j] == '\\':
                    num_backslashes += 1
                    j -= 1
                if num_backslashes % 2 == 0:
                    # Not escaped, this is a comment start
                    return line[:i]
            i += 1
        return line
    
    def _is_comment_line(self, line: str) -> bool:
        """Check if a line is entirely a comment (starts with %)."""
        stripped = line.lstrip()
        if not stripped:
            return False
        return stripped[0] == '%'
    
    def _get_non_comment_lines(self, content: str) -> List[Tuple[int, str]]:
        """
        Get all non-comment lines with their line numbers.
        
        Returns:
            List of (line_number, line_content) tuples for non-comment lines.
            Line content has inline comments removed.
        """
        lines = content.split('\n')
        result = []
        
        for line_num, line in enumerate(lines, 1):
            # Skip pure comment lines
            if self._is_comment_line(line):
                continue
            
            # Remove inline comments
            cleaned = self._remove_line_comment(line)
            
            # Skip if nothing left after removing comment
            if not cleaned.strip():
                continue
            
            result.append((line_num, cleaned))
        
        return result
    
    def _find_line_number(self, content: str, position: int) -> int:
        """Find line number for a character position in content."""
        return content[:position].count('\n') + 1
    
    def _get_line_content(self, content: str, line_number: int) -> str:
        """Get the content of a specific line."""
        lines = content.split('\n')
        if 1 <= line_number <= len(lines):
            return lines[line_number - 1].strip()
        return ""
    
    def _is_commented(self, content: str, position: int) -> bool:
        """Check if a position is within a LaTeX comment."""
        # Find the start of the current line
        line_start = content.rfind('\n', 0, position) + 1
        line_before = content[line_start:position]
        
        # Check for unescaped % before this position on the same line
        i = 0
        while i < len(line_before):
            if line_before[i] == '%':
                # Check if escaped
                num_backslashes = 0
                j = i - 1
                while j >= 0 and line_before[j] == '\\':
                    num_backslashes += 1
                    j -= 1
                if num_backslashes % 2 == 0:
                    # Not escaped, this is a comment
                    return True
            i += 1
        return False
    
    def _create_result(
        self,
        passed: bool,
        severity: CheckSeverity,
        message: str,
        line_number: Optional[int] = None,
        line_content: Optional[str] = None,
        suggestion: Optional[str] = None
    ) -> CheckResult:
        """Helper to create a CheckResult with this checker's name."""
        return CheckResult(
            checker_name=self.name,
            passed=passed,
            severity=severity,
            message=message,
            line_number=line_number,
            line_content=line_content,
            suggestion=suggestion
        )