File size: 10,231 Bytes

4a0f6a5

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Pattern-based rule engine for AI-text detection.

Design principles:
1.  No human-keywords (easily spoofed and noisy).
2.  AI indicators are structural / rhetorical patterns, not single words.
3.  Rules are registerable at runtime to stay ahead of adversarial paraphrasing.
4.  Scoring is length-normalised so long texts do not artificially inflate hits.
"""
from __future__ import annotations

import re
from dataclasses import dataclass
from typing import Dict, List, Tuple


@dataclass
class PatternRule:
    """A single detection rule comprised of one or more regex patterns."""

    rid: str
    name: str
    patterns: List[str]
    weight: float = 1.0
    category: str = "syntax"
    enabled: bool = True


# ---------------------------------------------------------------------------
# Default rule bank – these target *stylistic fingerprints* rather than
# overt self-references, making them harder for an adversary to guess and
# avoid with simple synonym replacement.
# ---------------------------------------------------------------------------
DEFAULT_RULES: List[PatternRule] = [
    PatternRule(
        rid="ai_struct_not_but",
        name="不是-而是结构",
        patterns=[r"不是[^，,。.!！?？]{0,30}而是"],
        weight=1.0,
        category="syntax",
    ),
    PatternRule(
        rid="ai_praise_grasp",
        name="赞扬精准抓住",
        patterns=[
            r"你\s*(非常|很|真的|确实)?\s*(精准|准确|精确)?\s*(地|的)?\s*抓住\s*了\s*这\s*(一)?\s*点",
            r"你\s*(非常|很|真的)?\s*(懂|理解|说到|指出)",
        ],
        weight=1.0,
        category="rhetoric",
    ),
    PatternRule(
        rid="ai_praise_textbook",
        name="教科书级别",
        patterns=[
            r"(教科书|标杆|模板|标准|典范|参考级)(级别|级|式|一样|般)的(理解|回答|分析|解读|阐述|表达|总结)",
            r"(堪称|可谓|可以算是|几乎就是)\s*(教科书|标杆|模板|标准)",
        ],
        weight=1.0,
        category="rhetoric",
    ),
    PatternRule(
        rid="ai_praise_correct",
        name="完全正确赞扬",
        patterns=[
            r"你说\s*(的)?\s*(完全|非常|十分|绝对|确实|太)?\s*正确",
            r"你\s*(说|讲|理解|看)\s*(得|的)?\s*(完全|非常|十分|绝对|确实|很|太)?\s*(对|到位|透彻)",
        ],
        weight=1.0,
        category="rhetoric",
    ),
    PatternRule(
        rid="ai_quote_abuse",
        name="双引号滥用",
        patterns=[r"([\"\"\"\"]).{2,40}?\1.{0,10}?\1.{2,40}?\1"],
        weight=0.8,
        category="style",
    ),
    PatternRule(
        rid="ai_summary_transition",
        name="总结过渡词",
        patterns=[
            r"(总的来说|综上所述|一言以蔽之|简而言之|总的来看).{0,5}[，,]",
            r"(首先|其次|再者|最后|一方面|另一方面).{0,5}[，,]",
        ],
        weight=0.5,
        category="syntax",
    ),
    PatternRule(
        rid="ai_modal_weak",
        name="弱化确定性",
        patterns=[
            r"(可能|也许|某种程度上|一定程度上|大致|基本).{0,10}(可以|算是|认为|看作)",
            r"(不\s*(排除|否定|否认|确定|保证))",
        ],
        weight=0.4,
        category="style",
    ),
    PatternRule(
        rid="ai_self_reference",
        name="AI自我指涉",
        patterns=[
            r"(作为|身为)\s*(一个|一名)?\s*(AI|人工智能|语言模型|大模型|助手)",
            r"(我\s*(是|作为)\s*(一个|一名)?\s*(AI|人工智能|语言模型|大模型|助手))",
            r"(没有|不\s*(具备|拥有))[^，,。.!！?？]{0,20}(情感|主观|个人经历|身体|人类)",
        ],
        weight=1.2,
        category="identity",
    ),
    # -----------------------------------------------------------------------
    # Community-contributed rules based on human intuitions about AI text.
    # -----------------------------------------------------------------------
    PatternRule(
        rid="ai_struct_not_ext",
        name="不是-而是排比扩展",
        patterns=[r"不是[^，,。.!！?？]{0,40}(而是|是|他更)[^，,。.!！?？]{0,20}(更|还)"],
        weight=1.0,
        category="syntax",
    ),
    PatternRule(
        rid="ai_meta_discourse",
        name="论述结构预告",
        patterns=[
            r"我分(几个|几)\s*个?(层面|维度)[来给]*你",
            r"一句话给[你]*[一个]*最[稳准狠]",
            r"从(几个|多)\s*个(维度|层面|角度)",
        ],
        weight=0.8,
        category="syntax",
    ),
    PatternRule(
        rid="ai_flattery_open",
        name="赞扬式开场",
        patterns=[
            r"你的(观察|洞察|直觉)\s*(非常|很|真的)?\s*(敏锐|精准|到位)",
            r"你[说讲看]*\s*(得|的)?\s*(非常|很|太)?\s*(对|到位|透彻|准确)",
        ],
        weight=1.0,
        category="rhetoric",
    ),
    PatternRule(
        rid="ai_over_agree",
        name="过度认同+自我否定",
        patterns=[
            r"你说的对，我(完全|确实)?\s*(搞错|误解|想错)",
            r"你看到了(事情|问题)?\s*的?本质",
        ],
        weight=1.0,
        category="rhetoric",
    ),
    PatternRule(
        rid="ai_service_push",
        name="服务性追问",
        patterns=[
            r"你现在感觉怎么样",
            r"要不要我帮[你]*",
        ],
        weight=0.8,
        category="rhetoric",
    ),
    PatternRule(
        rid="ai_poetic_cliche",
        name="诗意化比喻堆砌",
        patterns=[
            r"(时光|岁月|往事|记忆)\s*的?\s*(褶皱|琥珀|潮汐|甬道)",
            r"如[^，,。.!！?？]{0,20}(月光|星光)[^，,。.!！?？]{0,20}(潮汐|涟漪)",
            r"被封存在[^，,。.!！?？]{0,20}般?的?暮色",
        ],
        weight=1.0,
        category="style",
    ),
    PatternRule(
        rid="ai_list_numeric",
        name="数字分点结构",
        patterns=[
            r"1\.\s+.{3,40}[\n\r]+2\.\s+.{3,40}",
            r"一[、．.]\s*.{3,40}[\n\r]+二[、．.]\s*.{3,40}",
        ],
        weight=0.6,
        category="style",
    ),
    PatternRule(
        rid="ai_markdown_bold",
        name="Markdown粗体外泄",
        patterns=[r"\*\*[^*]+\*\*"],
        weight=0.5,
        category="style",
    ),
    PatternRule(
        rid="ai_honesty_preface",
        name="说实话前置",
        patterns=[r"[，,]\s*说实(话|在的)[，,]"],
        weight=0.4,
        category="style",
    ),
    PatternRule(
        rid="ai_perspective_close",
        name="视角收尾",
        patterns=[r"接近你?现在[^，,。.!！?？]{0,20}的?视角"],
        weight=0.4,
        category="style",
    ),
]


class RuleEngine:
    """Compiles and evaluates a set of PatternRules."""

    def __init__(self, rules: List[PatternRule] | None = None):
        self.rules: List[PatternRule] = list(rules) if rules is not None else []
        self._compiled: Dict[str, re.Pattern] = {}
        self._compile_all()

    def _compile_all(self) -> None:
        for r in self.rules:
            if not r.enabled:
                continue
            for idx, pat in enumerate(r.patterns):
                key = f"{r.rid}_{idx}"
                self._compiled[key] = re.compile(pat)

    def register(self, rule: PatternRule) -> None:
        """Add a new rule at runtime (useful for rapid adversarial response)."""
        self.rules.append(rule)
        if rule.enabled:
            for idx, pat in enumerate(rule.patterns):
                self._compiled[f"{rule.rid}_{idx}"] = re.compile(pat)

    def score(self, text: str) -> Tuple[float, Dict[str, int], int]:
        """Return (normalised_score, hit_map, active_rule_count)."""
        t = str(text)
        hits: Dict[str, int] = {}
        total_weighted_hits = 0.0
        for r in self.rules:
            if not r.enabled:
                continue
            cnt = 0
            for idx, _ in enumerate(r.patterns):
                key = f"{r.rid}_{idx}"
                pat = self._compiled.get(key)
                if pat:
                    cnt += len(pat.findall(t))
            if cnt:
                hits[r.rid] = cnt
                total_weighted_hits += cnt * r.weight
        # Normalise by sqrt(length) so long texts do not game the score.
        normalized_score = total_weighted_hits / (len(t) ** 0.5 + 1.0)
        active_count = sum(1 for r in self.rules if r.enabled)
        return float(normalized_score), hits, active_count

    def explain(self, text: str) -> List[Dict[str, object]]:
        """Produce a human-readable diagnosis of which rules fired."""
        t = str(text)
        out: List[Dict[str, object]] = []
        for r in self.rules:
            if not r.enabled:
                continue
            matches: List[str] = []
            for idx, _ in enumerate(r.patterns):
                key = f"{r.rid}_{idx}"
                pat = self._compiled.get(key)
                if pat:
                    found = pat.findall(t)
                    if found:
                        matches.extend(str(m) for m in found)
            if matches:
                out.append(
                    {
                        "rid": r.rid,
                        "name": r.name,
                        "category": r.category,
                        "matches": matches[:10],
                        "count": len(matches),
                        "weight": r.weight,
                    }
                )
        return out


# Global default engine – imported by model_utils and experiment scripts.
_DEFAULT_ENGINE = RuleEngine(DEFAULT_RULES)


def rule_score(text: str, engine: RuleEngine | None = None) -> Tuple[float, Dict[str, int], int]:
    """Backward-compatible entry point."""
    eng = engine or _DEFAULT_ENGINE
    return eng.score(text)


def rule_explain(text: str, engine: RuleEngine | None = None) -> List[Dict[str, object]]:
    """Diagnostic entry point."""
    eng = engine or _DEFAULT_ENGINE
    return eng.explain(text)