Spaces:

evalstate
/

diffusers-pr-api

Sleeping

File size: 7,803 Bytes

dbf7313

from __future__ import annotations

import re
from collections import defaultdict
from collections.abc import Mapping, Sequence
from dataclasses import dataclass
from typing import Any

HTML_COMMENT_PATTERN = re.compile(r"<!--.*?-->", re.DOTALL)
DEFAULT_TEMPLATE_CLEANUP_MODE = "merge_defaults"
DEFAULT_STRIP_HTML_COMMENTS = True
DEFAULT_TRIM_CLOSING_REFERENCE_PREFIX = True
DEFAULT_TEMPLATE_SECTION_PATTERNS = (
    r"^#{1,6}\s*code agent policy\s*$",
    r"^#{1,6}\s*before submitting\s*$",
    r"^#{1,6}\s*who can review\?\s*$",
)
DEFAULT_TEMPLATE_LINE_PATTERNS = (
    r"^#{1,6}\s*what does this pr do\?\s*$",
    r"^(?:fix(?:e[sd])?|close[sd]?|resolve[sd]?)\s*#\s*\(?issue\)?\s*$",
)
PR_TEMPLATE_CLOSING_REFERENCE_PREFIX_PATTERN = re.compile(
    r"""
    ^
    (?P<prefix>\s*(?:fix(?:e[sd])?|close[sd]?|resolve[sd]?)\s+)
    (?:
        (?:[a-z0-9_.-]+/[a-z0-9_.-]+)?\#\s*\d+
        (?:\s*(?:,|and)\s*(?:[a-z0-9_.-]+/[a-z0-9_.-]+)?\#\s*\d+)*
    )
    \s*(?:[:\-\u2013\u2014]\s*)?
    (?P<rest>.*)
    $
    """,
    re.IGNORECASE | re.VERBOSE,
)


def compile_casefold_patterns(patterns: Sequence[str]) -> tuple[re.Pattern[str], ...]:
    return tuple(re.compile(pattern, re.IGNORECASE) for pattern in patterns if pattern.strip())


@dataclass(slots=True, frozen=True)
class TemplateCleanupSettings:
    strip_html_comments: bool
    trim_closing_reference_prefix: bool
    section_patterns: tuple[re.Pattern[str], ...]
    line_patterns: tuple[re.Pattern[str], ...]


def build_template_cleanup_settings(
    *,
    mode: str = DEFAULT_TEMPLATE_CLEANUP_MODE,
    strip_html_comments: bool = DEFAULT_STRIP_HTML_COMMENTS,
    trim_closing_reference_prefix: bool = DEFAULT_TRIM_CLOSING_REFERENCE_PREFIX,
    section_patterns: Sequence[str] = (),
    line_patterns: Sequence[str] = (),
) -> TemplateCleanupSettings:
    if mode == "off":
        return TemplateCleanupSettings(
            strip_html_comments=False,
            trim_closing_reference_prefix=False,
            section_patterns=(),
            line_patterns=(),
        )
    if mode == "merge_defaults":
        section_sources = (*DEFAULT_TEMPLATE_SECTION_PATTERNS, *section_patterns)
        line_sources = (*DEFAULT_TEMPLATE_LINE_PATTERNS, *line_patterns)
    elif mode == "replace_defaults":
        section_sources = tuple(section_patterns)
        line_sources = tuple(line_patterns)
    else:
        raise ValueError(f"Unknown PR template cleanup mode: {mode}")
    return TemplateCleanupSettings(
        strip_html_comments=strip_html_comments,
        trim_closing_reference_prefix=trim_closing_reference_prefix,
        section_patterns=compile_casefold_patterns(section_sources),
        line_patterns=compile_casefold_patterns(line_sources),
    )


def strip_pull_request_template(
    body: str | None,
    *,
    settings: TemplateCleanupSettings | None = None,
) -> str:
    text = (body or "").replace("\r\n", "\n").replace("\r", "\n")
    if not text:
        return ""
    cleanup = settings or build_template_cleanup_settings()
    if cleanup.strip_html_comments:
        text = HTML_COMMENT_PATTERN.sub("\n", text)
    cleaned_lines: list[str] = []
    skip_section = False
    for raw_line in text.splitlines():
        line = raw_line.rstrip()
        normalized = line.strip()
        if any(pattern.match(normalized) for pattern in cleanup.line_patterns):
            continue
        if any(pattern.match(normalized) for pattern in cleanup.section_patterns):
            skip_section = True
            continue
        if skip_section:
            if normalized.startswith("#"):
                skip_section = False
            else:
                continue
        if cleanup.trim_closing_reference_prefix:
            trimmed_reference = _trim_closing_reference_prefix(normalized)
            if trimmed_reference == "":
                continue
            if trimmed_reference is not None:
                cleaned_lines.append(trimmed_reference)
                continue
        cleaned_lines.append(line)
    return collapse_blank_lines(cleaned_lines)


def collapse_blank_lines(lines: list[str]) -> str:
    collapsed: list[str] = []
    previous_blank = True
    for line in lines:
        stripped = line.strip()
        if not stripped:
            if previous_blank:
                continue
            collapsed.append("")
            previous_blank = True
            continue
        collapsed.append(stripped)
        previous_blank = False
    while collapsed and not collapsed[-1]:
        collapsed.pop()
    return "\n".join(collapsed)


def _trim_closing_reference_prefix(line: str) -> str | None:
    match = PR_TEMPLATE_CLOSING_REFERENCE_PREFIX_PATTERN.match(line)
    if match is None:
        return None
    return match.group("rest").strip()


@dataclass(slots=True, frozen=True)
class ClusterSuppressionRule:
    id: str
    title_patterns: tuple[re.Pattern[str], ...] = ()
    body_patterns: tuple[re.Pattern[str], ...] = ()
    path_patterns: tuple[re.Pattern[str], ...] = ()

    def matches(self, *, title: str, body: str, paths: Sequence[str]) -> bool:
        if not (self.title_patterns or self.body_patterns or self.path_patterns):
            return False
        if self.title_patterns and not any(
            pattern.search(title) for pattern in self.title_patterns
        ):
            return False
        if self.body_patterns and not any(pattern.search(body) for pattern in self.body_patterns):
            return False
        return not self.path_patterns or any(
            pattern.search(path) for pattern in self.path_patterns for path in paths
        )


def compile_cluster_suppression_rules(
    payload: Sequence[Mapping[str, Any]],
) -> tuple[ClusterSuppressionRule, ...]:
    rules: list[ClusterSuppressionRule] = []
    for index, raw_rule in enumerate(payload, start=1):
        rule_id = str(raw_rule.get("id") or raw_rule.get("name") or f"rule-{index}").strip()
        if not rule_id:
            rule_id = f"rule-{index}"
        rules.append(
            ClusterSuppressionRule(
                id=rule_id,
                title_patterns=compile_casefold_patterns(
                    _string_list(raw_rule.get("title_patterns"))
                ),
                body_patterns=compile_casefold_patterns(
                    _string_list(raw_rule.get("body_patterns"))
                ),
                path_patterns=compile_casefold_patterns(
                    _string_list(raw_rule.get("path_patterns"))
                ),
            )
        )
    return tuple(rules)


def suppressed_pull_request_reasons(
    pull_requests: Sequence[Mapping[str, Any]],
    pr_files: Sequence[Mapping[str, Any]],
    rules: Sequence[ClusterSuppressionRule],
) -> dict[int, list[str]]:
    if not rules:
        return {}
    paths_by_pr: defaultdict[int, list[str]] = defaultdict(list)
    for row in pr_files:
        pr_number = row.get("pull_request_number")
        filename = str(row.get("filename") or "").strip()
        if pr_number is None or not filename:
            continue
        paths_by_pr[int(pr_number)].append(filename)
    suppressed: dict[int, list[str]] = {}
    for row in pull_requests:
        number = row.get("number")
        if number is None:
            continue
        pr_number = int(number)
        title = str(row.get("title") or "")
        body = str(row.get("body") or "")
        matched = [
            rule.id
            for rule in rules
            if rule.matches(title=title, body=body, paths=paths_by_pr.get(pr_number, []))
        ]
        if matched:
            suppressed[pr_number] = matched
    return suppressed


def _string_list(value: Any) -> tuple[str, ...]:
    if not isinstance(value, list):
        return ()
    return tuple(str(item) for item in value if str(item).strip())