#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Pattern-based rule engine for AI-text detection. Design principles: 1. No human-keywords (easily spoofed and noisy). 2. AI indicators are structural / rhetorical patterns, not single words. 3. Rules are registerable at runtime to stay ahead of adversarial paraphrasing. 4. Scoring is length-normalised so long texts do not artificially inflate hits. """ from __future__ import annotations import re from dataclasses import dataclass from typing import Dict, List, Tuple @dataclass class PatternRule: """A single detection rule comprised of one or more regex patterns.""" rid: str name: str patterns: List[str] weight: float = 1.0 category: str = "syntax" enabled: bool = True # --------------------------------------------------------------------------- # Default rule bank – these target *stylistic fingerprints* rather than # overt self-references, making them harder for an adversary to guess and # avoid with simple synonym replacement. # --------------------------------------------------------------------------- DEFAULT_RULES: List[PatternRule] = [ PatternRule( rid="ai_struct_not_but", name="不是-而是结构", patterns=[r"不是[^,,。.!!??]{0,30}而是"], weight=1.0, category="syntax", ), PatternRule( rid="ai_praise_grasp", name="赞扬精准抓住", patterns=[ r"你\s*(非常|很|真的|确实)?\s*(精准|准确|精确)?\s*(地|的)?\s*抓住\s*了\s*这\s*(一)?\s*点", r"你\s*(非常|很|真的)?\s*(懂|理解|说到|指出)", ], weight=1.0, category="rhetoric", ), PatternRule( rid="ai_praise_textbook", name="教科书级别", patterns=[ r"(教科书|标杆|模板|标准|典范|参考级)(级别|级|式|一样|般)的(理解|回答|分析|解读|阐述|表达|总结)", r"(堪称|可谓|可以算是|几乎就是)\s*(教科书|标杆|模板|标准)", ], weight=1.0, category="rhetoric", ), PatternRule( rid="ai_praise_correct", name="完全正确赞扬", patterns=[ r"你说\s*(的)?\s*(完全|非常|十分|绝对|确实|太)?\s*正确", r"你\s*(说|讲|理解|看)\s*(得|的)?\s*(完全|非常|十分|绝对|确实|很|太)?\s*(对|到位|透彻)", ], weight=1.0, category="rhetoric", ), PatternRule( rid="ai_quote_abuse", name="双引号滥用", patterns=[r"([\"\"\"\"]).{2,40}?\1.{0,10}?\1.{2,40}?\1"], weight=0.8, category="style", ), PatternRule( rid="ai_summary_transition", name="总结过渡词", patterns=[ r"(总的来说|综上所述|一言以蔽之|简而言之|总的来看).{0,5}[,,]", r"(首先|其次|再者|最后|一方面|另一方面).{0,5}[,,]", ], weight=0.5, category="syntax", ), PatternRule( rid="ai_modal_weak", name="弱化确定性", patterns=[ r"(可能|也许|某种程度上|一定程度上|大致|基本).{0,10}(可以|算是|认为|看作)", r"(不\s*(排除|否定|否认|确定|保证))", ], weight=0.4, category="style", ), PatternRule( rid="ai_self_reference", name="AI自我指涉", patterns=[ r"(作为|身为)\s*(一个|一名)?\s*(AI|人工智能|语言模型|大模型|助手)", r"(我\s*(是|作为)\s*(一个|一名)?\s*(AI|人工智能|语言模型|大模型|助手))", r"(没有|不\s*(具备|拥有))[^,,。.!!??]{0,20}(情感|主观|个人经历|身体|人类)", ], weight=1.2, category="identity", ), # ----------------------------------------------------------------------- # Community-contributed rules based on human intuitions about AI text. # ----------------------------------------------------------------------- PatternRule( rid="ai_struct_not_ext", name="不是-而是排比扩展", patterns=[r"不是[^,,。.!!??]{0,40}(而是|是|他更)[^,,。.!!??]{0,20}(更|还)"], weight=1.0, category="syntax", ), PatternRule( rid="ai_meta_discourse", name="论述结构预告", patterns=[ r"我分(几个|几)\s*个?(层面|维度)[来给]*你", r"一句话给[你]*[一个]*最[稳准狠]", r"从(几个|多)\s*个(维度|层面|角度)", ], weight=0.8, category="syntax", ), PatternRule( rid="ai_flattery_open", name="赞扬式开场", patterns=[ r"你的(观察|洞察|直觉)\s*(非常|很|真的)?\s*(敏锐|精准|到位)", r"你[说讲看]*\s*(得|的)?\s*(非常|很|太)?\s*(对|到位|透彻|准确)", ], weight=1.0, category="rhetoric", ), PatternRule( rid="ai_over_agree", name="过度认同+自我否定", patterns=[ r"你说的对,我(完全|确实)?\s*(搞错|误解|想错)", r"你看到了(事情|问题)?\s*的?本质", ], weight=1.0, category="rhetoric", ), PatternRule( rid="ai_service_push", name="服务性追问", patterns=[ r"你现在感觉怎么样", r"要不要我帮[你]*", ], weight=0.8, category="rhetoric", ), PatternRule( rid="ai_poetic_cliche", name="诗意化比喻堆砌", patterns=[ r"(时光|岁月|往事|记忆)\s*的?\s*(褶皱|琥珀|潮汐|甬道)", r"如[^,,。.!!??]{0,20}(月光|星光)[^,,。.!!??]{0,20}(潮汐|涟漪)", r"被封存在[^,,。.!!??]{0,20}般?的?暮色", ], weight=1.0, category="style", ), PatternRule( rid="ai_list_numeric", name="数字分点结构", patterns=[ r"1\.\s+.{3,40}[\n\r]+2\.\s+.{3,40}", r"一[、..]\s*.{3,40}[\n\r]+二[、..]\s*.{3,40}", ], weight=0.6, category="style", ), PatternRule( rid="ai_markdown_bold", name="Markdown粗体外泄", patterns=[r"\*\*[^*]+\*\*"], weight=0.5, category="style", ), PatternRule( rid="ai_honesty_preface", name="说实话前置", patterns=[r"[,,]\s*说实(话|在的)[,,]"], weight=0.4, category="style", ), PatternRule( rid="ai_perspective_close", name="视角收尾", patterns=[r"接近你?现在[^,,。.!!??]{0,20}的?视角"], weight=0.4, category="style", ), ] class RuleEngine: """Compiles and evaluates a set of PatternRules.""" def __init__(self, rules: List[PatternRule] | None = None): self.rules: List[PatternRule] = list(rules) if rules is not None else [] self._compiled: Dict[str, re.Pattern] = {} self._compile_all() def _compile_all(self) -> None: for r in self.rules: if not r.enabled: continue for idx, pat in enumerate(r.patterns): key = f"{r.rid}_{idx}" self._compiled[key] = re.compile(pat) def register(self, rule: PatternRule) -> None: """Add a new rule at runtime (useful for rapid adversarial response).""" self.rules.append(rule) if rule.enabled: for idx, pat in enumerate(rule.patterns): self._compiled[f"{rule.rid}_{idx}"] = re.compile(pat) def score(self, text: str) -> Tuple[float, Dict[str, int], int]: """Return (normalised_score, hit_map, active_rule_count).""" t = str(text) hits: Dict[str, int] = {} total_weighted_hits = 0.0 for r in self.rules: if not r.enabled: continue cnt = 0 for idx, _ in enumerate(r.patterns): key = f"{r.rid}_{idx}" pat = self._compiled.get(key) if pat: cnt += len(pat.findall(t)) if cnt: hits[r.rid] = cnt total_weighted_hits += cnt * r.weight # Normalise by sqrt(length) so long texts do not game the score. normalized_score = total_weighted_hits / (len(t) ** 0.5 + 1.0) active_count = sum(1 for r in self.rules if r.enabled) return float(normalized_score), hits, active_count def explain(self, text: str) -> List[Dict[str, object]]: """Produce a human-readable diagnosis of which rules fired.""" t = str(text) out: List[Dict[str, object]] = [] for r in self.rules: if not r.enabled: continue matches: List[str] = [] for idx, _ in enumerate(r.patterns): key = f"{r.rid}_{idx}" pat = self._compiled.get(key) if pat: found = pat.findall(t) if found: matches.extend(str(m) for m in found) if matches: out.append( { "rid": r.rid, "name": r.name, "category": r.category, "matches": matches[:10], "count": len(matches), "weight": r.weight, } ) return out # Global default engine – imported by model_utils and experiment scripts. _DEFAULT_ENGINE = RuleEngine(DEFAULT_RULES) def rule_score(text: str, engine: RuleEngine | None = None) -> Tuple[float, Dict[str, int], int]: """Backward-compatible entry point.""" eng = engine or _DEFAULT_ENGINE return eng.score(text) def rule_explain(text: str, engine: RuleEngine | None = None) -> List[Dict[str, object]]: """Diagnostic entry point.""" eng = engine or _DEFAULT_ENGINE return eng.explain(text)