Text Classification
Transformers
Safetensors
Chinese
chinese
ai-text-detection
ensemble
bert
roberta
qwen
lora
research
dataset
Instructions to use LUCIFerace/enhanced-replica-model-pack with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use LUCIFerace/enhanced-replica-model-pack with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-classification", model="LUCIFerace/enhanced-replica-model-pack")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("LUCIFerace/enhanced-replica-model-pack", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| #!/usr/bin/env python3 | |
| # -*- coding: utf-8 -*- | |
| """Pattern-based rule engine for AI-text detection. | |
| Design principles: | |
| 1. No human-keywords (easily spoofed and noisy). | |
| 2. AI indicators are structural / rhetorical patterns, not single words. | |
| 3. Rules are registerable at runtime to stay ahead of adversarial paraphrasing. | |
| 4. Scoring is length-normalised so long texts do not artificially inflate hits. | |
| """ | |
| from __future__ import annotations | |
| import re | |
| from dataclasses import dataclass | |
| from typing import Dict, List, Tuple | |
| class PatternRule: | |
| """A single detection rule comprised of one or more regex patterns.""" | |
| rid: str | |
| name: str | |
| patterns: List[str] | |
| weight: float = 1.0 | |
| category: str = "syntax" | |
| enabled: bool = True | |
| # --------------------------------------------------------------------------- | |
| # Default rule bank – these target *stylistic fingerprints* rather than | |
| # overt self-references, making them harder for an adversary to guess and | |
| # avoid with simple synonym replacement. | |
| # --------------------------------------------------------------------------- | |
| DEFAULT_RULES: List[PatternRule] = [ | |
| PatternRule( | |
| rid="ai_struct_not_but", | |
| name="不是-而是结构", | |
| patterns=[r"不是[^,,。.!!??]{0,30}而是"], | |
| weight=1.0, | |
| category="syntax", | |
| ), | |
| PatternRule( | |
| rid="ai_praise_grasp", | |
| name="赞扬精准抓住", | |
| patterns=[ | |
| r"你\s*(非常|很|真的|确实)?\s*(精准|准确|精确)?\s*(地|的)?\s*抓住\s*了\s*这\s*(一)?\s*点", | |
| r"你\s*(非常|很|真的)?\s*(懂|理解|说到|指出)", | |
| ], | |
| weight=1.0, | |
| category="rhetoric", | |
| ), | |
| PatternRule( | |
| rid="ai_praise_textbook", | |
| name="教科书级别", | |
| patterns=[ | |
| r"(教科书|标杆|模板|标准|典范|参考级)(级别|级|式|一样|般)的(理解|回答|分析|解读|阐述|表达|总结)", | |
| r"(堪称|可谓|可以算是|几乎就是)\s*(教科书|标杆|模板|标准)", | |
| ], | |
| weight=1.0, | |
| category="rhetoric", | |
| ), | |
| PatternRule( | |
| rid="ai_praise_correct", | |
| name="完全正确赞扬", | |
| patterns=[ | |
| r"你说\s*(的)?\s*(完全|非常|十分|绝对|确实|太)?\s*正确", | |
| r"你\s*(说|讲|理解|看)\s*(得|的)?\s*(完全|非常|十分|绝对|确实|很|太)?\s*(对|到位|透彻)", | |
| ], | |
| weight=1.0, | |
| category="rhetoric", | |
| ), | |
| PatternRule( | |
| rid="ai_quote_abuse", | |
| name="双引号滥用", | |
| patterns=[r"([\"\"\"\"]).{2,40}?\1.{0,10}?\1.{2,40}?\1"], | |
| weight=0.8, | |
| category="style", | |
| ), | |
| PatternRule( | |
| rid="ai_summary_transition", | |
| name="总结过渡词", | |
| patterns=[ | |
| r"(总的来说|综上所述|一言以蔽之|简而言之|总的来看).{0,5}[,,]", | |
| r"(首先|其次|再者|最后|一方面|另一方面).{0,5}[,,]", | |
| ], | |
| weight=0.5, | |
| category="syntax", | |
| ), | |
| PatternRule( | |
| rid="ai_modal_weak", | |
| name="弱化确定性", | |
| patterns=[ | |
| r"(可能|也许|某种程度上|一定程度上|大致|基本).{0,10}(可以|算是|认为|看作)", | |
| r"(不\s*(排除|否定|否认|确定|保证))", | |
| ], | |
| weight=0.4, | |
| category="style", | |
| ), | |
| PatternRule( | |
| rid="ai_self_reference", | |
| name="AI自我指涉", | |
| patterns=[ | |
| r"(作为|身为)\s*(一个|一名)?\s*(AI|人工智能|语言模型|大模型|助手)", | |
| r"(我\s*(是|作为)\s*(一个|一名)?\s*(AI|人工智能|语言模型|大模型|助手))", | |
| r"(没有|不\s*(具备|拥有))[^,,。.!!??]{0,20}(情感|主观|个人经历|身体|人类)", | |
| ], | |
| weight=1.2, | |
| category="identity", | |
| ), | |
| # ----------------------------------------------------------------------- | |
| # Community-contributed rules based on human intuitions about AI text. | |
| # ----------------------------------------------------------------------- | |
| PatternRule( | |
| rid="ai_struct_not_ext", | |
| name="不是-而是排比扩展", | |
| patterns=[r"不是[^,,。.!!??]{0,40}(而是|是|他更)[^,,。.!!??]{0,20}(更|还)"], | |
| weight=1.0, | |
| category="syntax", | |
| ), | |
| PatternRule( | |
| rid="ai_meta_discourse", | |
| name="论述结构预告", | |
| patterns=[ | |
| r"我分(几个|几)\s*个?(层面|维度)[来给]*你", | |
| r"一句话给[你]*[一个]*最[稳准狠]", | |
| r"从(几个|多)\s*个(维度|层面|角度)", | |
| ], | |
| weight=0.8, | |
| category="syntax", | |
| ), | |
| PatternRule( | |
| rid="ai_flattery_open", | |
| name="赞扬式开场", | |
| patterns=[ | |
| r"你的(观察|洞察|直觉)\s*(非常|很|真的)?\s*(敏锐|精准|到位)", | |
| r"你[说讲看]*\s*(得|的)?\s*(非常|很|太)?\s*(对|到位|透彻|准确)", | |
| ], | |
| weight=1.0, | |
| category="rhetoric", | |
| ), | |
| PatternRule( | |
| rid="ai_over_agree", | |
| name="过度认同+自我否定", | |
| patterns=[ | |
| r"你说的对,我(完全|确实)?\s*(搞错|误解|想错)", | |
| r"你看到了(事情|问题)?\s*的?本质", | |
| ], | |
| weight=1.0, | |
| category="rhetoric", | |
| ), | |
| PatternRule( | |
| rid="ai_service_push", | |
| name="服务性追问", | |
| patterns=[ | |
| r"你现在感觉怎么样", | |
| r"要不要我帮[你]*", | |
| ], | |
| weight=0.8, | |
| category="rhetoric", | |
| ), | |
| PatternRule( | |
| rid="ai_poetic_cliche", | |
| name="诗意化比喻堆砌", | |
| patterns=[ | |
| r"(时光|岁月|往事|记忆)\s*的?\s*(褶皱|琥珀|潮汐|甬道)", | |
| r"如[^,,。.!!??]{0,20}(月光|星光)[^,,。.!!??]{0,20}(潮汐|涟漪)", | |
| r"被封存在[^,,。.!!??]{0,20}般?的?暮色", | |
| ], | |
| weight=1.0, | |
| category="style", | |
| ), | |
| PatternRule( | |
| rid="ai_list_numeric", | |
| name="数字分点结构", | |
| patterns=[ | |
| r"1\.\s+.{3,40}[\n\r]+2\.\s+.{3,40}", | |
| r"一[、..]\s*.{3,40}[\n\r]+二[、..]\s*.{3,40}", | |
| ], | |
| weight=0.6, | |
| category="style", | |
| ), | |
| PatternRule( | |
| rid="ai_markdown_bold", | |
| name="Markdown粗体外泄", | |
| patterns=[r"\*\*[^*]+\*\*"], | |
| weight=0.5, | |
| category="style", | |
| ), | |
| PatternRule( | |
| rid="ai_honesty_preface", | |
| name="说实话前置", | |
| patterns=[r"[,,]\s*说实(话|在的)[,,]"], | |
| weight=0.4, | |
| category="style", | |
| ), | |
| PatternRule( | |
| rid="ai_perspective_close", | |
| name="视角收尾", | |
| patterns=[r"接近你?现在[^,,。.!!??]{0,20}的?视角"], | |
| weight=0.4, | |
| category="style", | |
| ), | |
| ] | |
| class RuleEngine: | |
| """Compiles and evaluates a set of PatternRules.""" | |
| def __init__(self, rules: List[PatternRule] | None = None): | |
| self.rules: List[PatternRule] = list(rules) if rules is not None else [] | |
| self._compiled: Dict[str, re.Pattern] = {} | |
| self._compile_all() | |
| def _compile_all(self) -> None: | |
| for r in self.rules: | |
| if not r.enabled: | |
| continue | |
| for idx, pat in enumerate(r.patterns): | |
| key = f"{r.rid}_{idx}" | |
| self._compiled[key] = re.compile(pat) | |
| def register(self, rule: PatternRule) -> None: | |
| """Add a new rule at runtime (useful for rapid adversarial response).""" | |
| self.rules.append(rule) | |
| if rule.enabled: | |
| for idx, pat in enumerate(rule.patterns): | |
| self._compiled[f"{rule.rid}_{idx}"] = re.compile(pat) | |
| def score(self, text: str) -> Tuple[float, Dict[str, int], int]: | |
| """Return (normalised_score, hit_map, active_rule_count).""" | |
| t = str(text) | |
| hits: Dict[str, int] = {} | |
| total_weighted_hits = 0.0 | |
| for r in self.rules: | |
| if not r.enabled: | |
| continue | |
| cnt = 0 | |
| for idx, _ in enumerate(r.patterns): | |
| key = f"{r.rid}_{idx}" | |
| pat = self._compiled.get(key) | |
| if pat: | |
| cnt += len(pat.findall(t)) | |
| if cnt: | |
| hits[r.rid] = cnt | |
| total_weighted_hits += cnt * r.weight | |
| # Normalise by sqrt(length) so long texts do not game the score. | |
| normalized_score = total_weighted_hits / (len(t) ** 0.5 + 1.0) | |
| active_count = sum(1 for r in self.rules if r.enabled) | |
| return float(normalized_score), hits, active_count | |
| def explain(self, text: str) -> List[Dict[str, object]]: | |
| """Produce a human-readable diagnosis of which rules fired.""" | |
| t = str(text) | |
| out: List[Dict[str, object]] = [] | |
| for r in self.rules: | |
| if not r.enabled: | |
| continue | |
| matches: List[str] = [] | |
| for idx, _ in enumerate(r.patterns): | |
| key = f"{r.rid}_{idx}" | |
| pat = self._compiled.get(key) | |
| if pat: | |
| found = pat.findall(t) | |
| if found: | |
| matches.extend(str(m) for m in found) | |
| if matches: | |
| out.append( | |
| { | |
| "rid": r.rid, | |
| "name": r.name, | |
| "category": r.category, | |
| "matches": matches[:10], | |
| "count": len(matches), | |
| "weight": r.weight, | |
| } | |
| ) | |
| return out | |
| # Global default engine – imported by model_utils and experiment scripts. | |
| _DEFAULT_ENGINE = RuleEngine(DEFAULT_RULES) | |
| def rule_score(text: str, engine: RuleEngine | None = None) -> Tuple[float, Dict[str, int], int]: | |
| """Backward-compatible entry point.""" | |
| eng = engine or _DEFAULT_ENGINE | |
| return eng.score(text) | |
| def rule_explain(text: str, engine: RuleEngine | None = None) -> List[Dict[str, object]]: | |
| """Diagnostic entry point.""" | |
| eng = engine or _DEFAULT_ENGINE | |
| return eng.explain(text) | |