Spaces:

thinkwee
/

BibGuard

Running

thinkwee

init

46df5f0 7 days ago

6.3 kB

	"""
	Base checker class for paper submission quality checks.

	All specific checkers inherit from BaseChecker and implement
	the check() method to validate specific aspects of the TeX document.
	"""
	import re
	from abc import ABC, abstractmethod
	from dataclasses import dataclass
	from enum import Enum
	from typing import List, Optional, Tuple


	class CheckSeverity(Enum):
	"""Severity levels for check results."""
	ERROR = "error" # Must fix before submission
	WARNING = "warning" # Strongly recommended to fix
	INFO = "info" # Suggestion or best practice


	@dataclass
	class CheckResult:
	"""Result of a single check."""
	checker_name: str
	passed: bool
	severity: CheckSeverity
	message: str
	line_number: Optional[int] = None
	line_content: Optional[str] = None
	suggestion: Optional[str] = None
	file_path: Optional[str] = None

	def to_dict(self) -> dict:
	return {
	'checker': self.checker_name,
	'passed': self.passed,
	'severity': self.severity.value,
	'message': self.message,
	'line': self.line_number,
	'content': self.line_content,
	'suggestion': self.suggestion,
	'file_path': self.file_path
	}


	class BaseChecker(ABC):
	"""
	Abstract base class for all paper submission checkers.

	Each checker validates a specific aspect of the paper,
	such as caption placement, reference integrity, or formatting.
	"""

	# Checker metadata - override in subclasses
	name: str = "base"
	display_name: str = "Base Checker"
	description: str = "Base checker class"

	@abstractmethod
	def check(self, tex_content: str, config: dict = None) -> List[CheckResult]:
	"""
	Run the check on the given TeX content.

	Args:
	tex_content: The full content of the TeX file
	config: Optional configuration dict (e.g., conference-specific settings)

	Returns:
	List of CheckResult objects describing found issues
	"""
	pass

	def _remove_comments(self, content: str) -> str:
	"""
	Remove all LaTeX comments from content.

	Preserves line structure (replaces comment with empty string on same line).
	Handles escaped percent signs (\\%) correctly.
	"""
	lines = content.split('\n')
	result = []

	for line in lines:
	# Find first unescaped %
	cleaned = self._remove_line_comment(line)
	result.append(cleaned)

	return '\n'.join(result)

	def _remove_line_comment(self, line: str) -> str:
	"""Remove comment from a single line, preserving content before %."""
	i = 0
	while i < len(line):
	if line[i] == '%':
	# Check if escaped
	num_backslashes = 0
	j = i - 1
	while j >= 0 and line[j] == '\\':
	num_backslashes += 1
	j -= 1
	if num_backslashes % 2 == 0:
	# Not escaped, this is a comment start
	return line[:i]
	i += 1
	return line

	def _is_comment_line(self, line: str) -> bool:
	"""Check if a line is entirely a comment (starts with %)."""
	stripped = line.lstrip()
	if not stripped:
	return False
	return stripped[0] == '%'

	def _get_non_comment_lines(self, content: str) -> List[Tuple[int, str]]:
	"""
	Get all non-comment lines with their line numbers.

	Returns:
	List of (line_number, line_content) tuples for non-comment lines.
	Line content has inline comments removed.
	"""
	lines = content.split('\n')
	result = []

	for line_num, line in enumerate(lines, 1):
	# Skip pure comment lines
	if self._is_comment_line(line):
	continue

	# Remove inline comments
	cleaned = self._remove_line_comment(line)

	# Skip if nothing left after removing comment
	if not cleaned.strip():
	continue

	result.append((line_num, cleaned))

	return result

	def _find_line_number(self, content: str, position: int) -> int:
	"""Find line number for a character position in content."""
	return content[:position].count('\n') + 1

	def _get_line_content(self, content: str, line_number: int) -> str:
	"""Get the content of a specific line."""
	lines = content.split('\n')
	if 1 <= line_number <= len(lines):
	return lines[line_number - 1].strip()
	return ""

	def _is_commented(self, content: str, position: int) -> bool:
	"""Check if a position is within a LaTeX comment."""
	# Find the start of the current line
	line_start = content.rfind('\n', 0, position) + 1
	line_before = content[line_start:position]

	# Check for unescaped % before this position on the same line
	i = 0
	while i < len(line_before):
	if line_before[i] == '%':
	# Check if escaped
	num_backslashes = 0
	j = i - 1
	while j >= 0 and line_before[j] == '\\':
	num_backslashes += 1
	j -= 1
	if num_backslashes % 2 == 0:
	# Not escaped, this is a comment
	return True
	i += 1
	return False

	def _create_result(
	self,
	passed: bool,
	severity: CheckSeverity,
	message: str,
	line_number: Optional[int] = None,
	line_content: Optional[str] = None,
	suggestion: Optional[str] = None
	) -> CheckResult:
	"""Helper to create a CheckResult with this checker's name."""
	return CheckResult(
	checker_name=self.name,
	passed=passed,
	severity=severity,
	message=message,
	line_number=line_number,
	line_content=line_content,
	suggestion=suggestion
	)