LUCIFerace's picture
Add files using upload-large-folder tool
4a0f6a5 verified
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Pattern-based rule engine for AI-text detection.
Design principles:
1. No human-keywords (easily spoofed and noisy).
2. AI indicators are structural / rhetorical patterns, not single words.
3. Rules are registerable at runtime to stay ahead of adversarial paraphrasing.
4. Scoring is length-normalised so long texts do not artificially inflate hits.
"""
from __future__ import annotations
import re
from dataclasses import dataclass
from typing import Dict, List, Tuple
@dataclass
class PatternRule:
"""A single detection rule comprised of one or more regex patterns."""
rid: str
name: str
patterns: List[str]
weight: float = 1.0
category: str = "syntax"
enabled: bool = True
# ---------------------------------------------------------------------------
# Default rule bank – these target *stylistic fingerprints* rather than
# overt self-references, making them harder for an adversary to guess and
# avoid with simple synonym replacement.
# ---------------------------------------------------------------------------
DEFAULT_RULES: List[PatternRule] = [
PatternRule(
rid="ai_struct_not_but",
name="不是-而是结构",
patterns=[r"不是[^,,。.!!??]{0,30}而是"],
weight=1.0,
category="syntax",
),
PatternRule(
rid="ai_praise_grasp",
name="赞扬精准抓住",
patterns=[
r"你\s*(非常|很|真的|确实)?\s*(精准|准确|精确)?\s*(地|的)?\s*抓住\s*了\s*这\s*(一)?\s*点",
r"你\s*(非常|很|真的)?\s*(懂|理解|说到|指出)",
],
weight=1.0,
category="rhetoric",
),
PatternRule(
rid="ai_praise_textbook",
name="教科书级别",
patterns=[
r"(教科书|标杆|模板|标准|典范|参考级)(级别|级|式|一样|般)的(理解|回答|分析|解读|阐述|表达|总结)",
r"(堪称|可谓|可以算是|几乎就是)\s*(教科书|标杆|模板|标准)",
],
weight=1.0,
category="rhetoric",
),
PatternRule(
rid="ai_praise_correct",
name="完全正确赞扬",
patterns=[
r"你说\s*(的)?\s*(完全|非常|十分|绝对|确实|太)?\s*正确",
r"你\s*(说|讲|理解|看)\s*(得|的)?\s*(完全|非常|十分|绝对|确实|很|太)?\s*(对|到位|透彻)",
],
weight=1.0,
category="rhetoric",
),
PatternRule(
rid="ai_quote_abuse",
name="双引号滥用",
patterns=[r"([\"\"\"\"]).{2,40}?\1.{0,10}?\1.{2,40}?\1"],
weight=0.8,
category="style",
),
PatternRule(
rid="ai_summary_transition",
name="总结过渡词",
patterns=[
r"(总的来说|综上所述|一言以蔽之|简而言之|总的来看).{0,5}[,,]",
r"(首先|其次|再者|最后|一方面|另一方面).{0,5}[,,]",
],
weight=0.5,
category="syntax",
),
PatternRule(
rid="ai_modal_weak",
name="弱化确定性",
patterns=[
r"(可能|也许|某种程度上|一定程度上|大致|基本).{0,10}(可以|算是|认为|看作)",
r"(不\s*(排除|否定|否认|确定|保证))",
],
weight=0.4,
category="style",
),
PatternRule(
rid="ai_self_reference",
name="AI自我指涉",
patterns=[
r"(作为|身为)\s*(一个|一名)?\s*(AI|人工智能|语言模型|大模型|助手)",
r"(我\s*(是|作为)\s*(一个|一名)?\s*(AI|人工智能|语言模型|大模型|助手))",
r"(没有|不\s*(具备|拥有))[^,,。.!!??]{0,20}(情感|主观|个人经历|身体|人类)",
],
weight=1.2,
category="identity",
),
# -----------------------------------------------------------------------
# Community-contributed rules based on human intuitions about AI text.
# -----------------------------------------------------------------------
PatternRule(
rid="ai_struct_not_ext",
name="不是-而是排比扩展",
patterns=[r"不是[^,,。.!!??]{0,40}(而是|是|他更)[^,,。.!!??]{0,20}(更|还)"],
weight=1.0,
category="syntax",
),
PatternRule(
rid="ai_meta_discourse",
name="论述结构预告",
patterns=[
r"我分(几个|几)\s*个?(层面|维度)[来给]*你",
r"一句话给[你]*[一个]*最[稳准狠]",
r"从(几个|多)\s*个(维度|层面|角度)",
],
weight=0.8,
category="syntax",
),
PatternRule(
rid="ai_flattery_open",
name="赞扬式开场",
patterns=[
r"你的(观察|洞察|直觉)\s*(非常|很|真的)?\s*(敏锐|精准|到位)",
r"你[说讲看]*\s*(得|的)?\s*(非常|很|太)?\s*(对|到位|透彻|准确)",
],
weight=1.0,
category="rhetoric",
),
PatternRule(
rid="ai_over_agree",
name="过度认同+自我否定",
patterns=[
r"你说的对,我(完全|确实)?\s*(搞错|误解|想错)",
r"你看到了(事情|问题)?\s*的?本质",
],
weight=1.0,
category="rhetoric",
),
PatternRule(
rid="ai_service_push",
name="服务性追问",
patterns=[
r"你现在感觉怎么样",
r"要不要我帮[你]*",
],
weight=0.8,
category="rhetoric",
),
PatternRule(
rid="ai_poetic_cliche",
name="诗意化比喻堆砌",
patterns=[
r"(时光|岁月|往事|记忆)\s*的?\s*(褶皱|琥珀|潮汐|甬道)",
r"如[^,,。.!!??]{0,20}(月光|星光)[^,,。.!!??]{0,20}(潮汐|涟漪)",
r"被封存在[^,,。.!!??]{0,20}般?的?暮色",
],
weight=1.0,
category="style",
),
PatternRule(
rid="ai_list_numeric",
name="数字分点结构",
patterns=[
r"1\.\s+.{3,40}[\n\r]+2\.\s+.{3,40}",
r"一[、..]\s*.{3,40}[\n\r]+二[、..]\s*.{3,40}",
],
weight=0.6,
category="style",
),
PatternRule(
rid="ai_markdown_bold",
name="Markdown粗体外泄",
patterns=[r"\*\*[^*]+\*\*"],
weight=0.5,
category="style",
),
PatternRule(
rid="ai_honesty_preface",
name="说实话前置",
patterns=[r"[,,]\s*说实(话|在的)[,,]"],
weight=0.4,
category="style",
),
PatternRule(
rid="ai_perspective_close",
name="视角收尾",
patterns=[r"接近你?现在[^,,。.!!??]{0,20}的?视角"],
weight=0.4,
category="style",
),
]
class RuleEngine:
"""Compiles and evaluates a set of PatternRules."""
def __init__(self, rules: List[PatternRule] | None = None):
self.rules: List[PatternRule] = list(rules) if rules is not None else []
self._compiled: Dict[str, re.Pattern] = {}
self._compile_all()
def _compile_all(self) -> None:
for r in self.rules:
if not r.enabled:
continue
for idx, pat in enumerate(r.patterns):
key = f"{r.rid}_{idx}"
self._compiled[key] = re.compile(pat)
def register(self, rule: PatternRule) -> None:
"""Add a new rule at runtime (useful for rapid adversarial response)."""
self.rules.append(rule)
if rule.enabled:
for idx, pat in enumerate(rule.patterns):
self._compiled[f"{rule.rid}_{idx}"] = re.compile(pat)
def score(self, text: str) -> Tuple[float, Dict[str, int], int]:
"""Return (normalised_score, hit_map, active_rule_count)."""
t = str(text)
hits: Dict[str, int] = {}
total_weighted_hits = 0.0
for r in self.rules:
if not r.enabled:
continue
cnt = 0
for idx, _ in enumerate(r.patterns):
key = f"{r.rid}_{idx}"
pat = self._compiled.get(key)
if pat:
cnt += len(pat.findall(t))
if cnt:
hits[r.rid] = cnt
total_weighted_hits += cnt * r.weight
# Normalise by sqrt(length) so long texts do not game the score.
normalized_score = total_weighted_hits / (len(t) ** 0.5 + 1.0)
active_count = sum(1 for r in self.rules if r.enabled)
return float(normalized_score), hits, active_count
def explain(self, text: str) -> List[Dict[str, object]]:
"""Produce a human-readable diagnosis of which rules fired."""
t = str(text)
out: List[Dict[str, object]] = []
for r in self.rules:
if not r.enabled:
continue
matches: List[str] = []
for idx, _ in enumerate(r.patterns):
key = f"{r.rid}_{idx}"
pat = self._compiled.get(key)
if pat:
found = pat.findall(t)
if found:
matches.extend(str(m) for m in found)
if matches:
out.append(
{
"rid": r.rid,
"name": r.name,
"category": r.category,
"matches": matches[:10],
"count": len(matches),
"weight": r.weight,
}
)
return out
# Global default engine – imported by model_utils and experiment scripts.
_DEFAULT_ENGINE = RuleEngine(DEFAULT_RULES)
def rule_score(text: str, engine: RuleEngine | None = None) -> Tuple[float, Dict[str, int], int]:
"""Backward-compatible entry point."""
eng = engine or _DEFAULT_ENGINE
return eng.score(text)
def rule_explain(text: str, engine: RuleEngine | None = None) -> List[Dict[str, object]]:
"""Diagnostic entry point."""
eng = engine or _DEFAULT_ENGINE
return eng.explain(text)