File size: 4,140 Bytes
e5344c6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 | from __future__ import annotations
import argparse
import re
import sys
import tokenize
from dataclasses import dataclass
from decimal import Decimal, InvalidOperation
from pathlib import Path
from typing import Iterator, List, Sequence
SOURCE_EXTENSIONS = {".py"}
TEXT_EXTENSIONS = {".json", ".yaml", ".yml", ".txt"}
SKIP_DIRS = {".git", ".venv", "venv", "__pycache__", ".mypy_cache", ".pytest_cache", ".ruff_cache"}
DECIMAL_PATTERN = re.compile(
r"(?<![\w.])[+-]?(?:\d+\.\d*|\.\d+|\d+(?:\.\d*)?[eE][+-]?\d+)(?![\w.])"
)
@dataclass(frozen=True)
class Finding:
path: Path
line: int
token: str
value: str
def is_decimal_token(token: str) -> bool:
return "." in token or "e" in token.lower()
def parse_decimal(token: str) -> Decimal | None:
try:
return Decimal(token)
except (InvalidOperation, ValueError):
return None
def boundary_check(token: str) -> bool:
value = parse_decimal(token)
return value is not None and value in {Decimal(0), Decimal(1)}
def scan_python_file(path: Path) -> List[Finding]:
findings: List[Finding] = []
try:
with tokenize.open(path) as handle:
tokens = tokenize.generate_tokens(handle.readline)
for tok_type, tok_str, start, _, _ in tokens:
if tok_type != tokenize.NUMBER:
continue
if not is_decimal_token(tok_str):
continue
if boundary_check(tok_str):
value = parse_decimal(tok_str)
findings.append(Finding(path=path, line=start[0], token=tok_str, value=str(value)))
except (OSError, SyntaxError, tokenize.TokenError) as exc:
findings.append(Finding(path=path, line=1, token="<parse-error>", value=str(exc)))
return findings
def scan_text_file(path: Path) -> List[Finding]:
findings: List[Finding] = []
try:
text = path.read_text(encoding="utf-8")
except OSError as exc:
return [Finding(path=path, line=1, token="<read-error>", value=str(exc))]
for line_number, line in enumerate(text.splitlines(), start=1):
stripped = line.lstrip()
if path.suffix in {".yaml", ".yml"} and stripped.startswith("#"):
continue
for match in DECIMAL_PATTERN.finditer(line):
token = match.group(0)
if boundary_check(token):
value = parse_decimal(token)
findings.append(Finding(path=path, line=line_number, token=token, value=str(value)))
return findings
def iter_target_files(root: Path) -> Iterator[Path]:
for path in root.rglob("*"):
if any(part in SKIP_DIRS for part in path.parts):
continue
if not path.is_file():
continue
if path.suffix in SOURCE_EXTENSIONS or path.suffix in TEXT_EXTENSIONS:
yield path
def collect_findings(root: Path) -> List[Finding]:
findings: List[Finding] = []
for path in sorted(iter_target_files(root)):
if path.suffix in SOURCE_EXTENSIONS:
findings.extend(scan_python_file(path))
else:
findings.extend(scan_text_file(path))
return findings
def format_findings(findings: Sequence[Finding], root: Path) -> str:
lines = []
for finding in findings:
lines.append(f"{finding.path.relative_to(root)}:{finding.line}: boundary decimal {finding.token} -> {finding.value}")
return "\n".join(lines)
def main(argv: Sequence[str] | None = None) -> int:
parser = argparse.ArgumentParser(description="Validate that decimal literals do not touch 0 or 1.")
parser.add_argument("path", nargs="?", default=".", help="Repository path to scan")
args = parser.parse_args(argv)
root = Path(args.path).resolve()
findings = collect_findings(root)
if findings:
print("Task validation failed: boundary-touching decimals found.", file=sys.stderr)
print(format_findings(findings, root), file=sys.stderr)
return 1
print("Task validation passed: no decimal literals touch 0 or 1.")
return 0
if __name__ == "__main__":
raise SystemExit(main()) |