evasion-detection-artifacts / src /eval_statistical.py
simonlesaumon's picture
Upload src/eval_statistical.py with huggingface_hub
7522ee5 verified
Raw
History Blame Contribute Delete
13.5 kB
"""
Lightweight statistical evaluation — no torch/transformers needed.
Computes the exact text dispersion metrics that AI detectors exploit:
- Vocabulary richness (TTR, Hapax ratio)
- Sentence length variance (burstiness)
- Word frequency dispersion
- Token repetition patterns
- Readability scores
These are the features detectors like GPTZero (perplexity + burstiness),
Fast-DetectGPT (curvature), and Binoculars (cross-perplexity) exploit.
"""
from __future__ import annotations
import json
import math
import os
import re
import sys
from collections import Counter
from dataclasses import dataclass, field
@dataclass
class TextStats:
"""Statistical profile of a single text."""
num_words: int = 0
num_sentences: int = 0
num_unique_words: int = 0
type_token_ratio: float = 0.0 # vocabulary richness
hapax_ratio: float = 0.0 # words appearing exactly once
avg_word_len: float = 0.0
std_word_len: float = 0.0
avg_sentence_len: float = 0.0 # mean sentence length
std_sentence_len: float = 0.0 # burstiness proxy
sentence_len_cv: float = 0.0 # coefficient of variation
avg_word_freq: float = 0.0 # mean frequency of words
std_word_freq: float = 0.0 # dispersion of word frequencies
readability_flesch: float = 0.0 # Flesch Reading Ease
def tokenize_sentences(text: str) -> list[str]:
"""Simple sentence tokenizer."""
return [s.strip() for s in re.split(r'[.!?]+', text) if s.strip() and len(s.strip().split()) >= 3]
def tokenize_words(text: str) -> list[str]:
"""Simple word tokenizer — lowercase, strip punctuation."""
return [w.lower().strip('.,;:!?()[]{}"\'-') for w in text.split() if w.strip('.,;:!?()[]{}"\'-')]
def compute_stats(text: str) -> TextStats:
"""Compute all statistical metrics for a text."""
stats = TextStats()
words = tokenize_words(text)
sentences = tokenize_sentences(text)
if not words:
return stats
# Word-level stats
stats.num_words = len(words)
stats.num_unique_words = len(set(words))
stats.type_token_ratio = stats.num_unique_words / max(stats.num_words, 1)
word_counts = Counter(words)
hapax = sum(1 for c in word_counts.values() if c == 1)
stats.hapax_ratio = hapax / max(stats.num_unique_words, 1)
word_lens = [len(w) for w in words]
stats.avg_word_len = sum(word_lens) / max(len(word_lens), 1)
stats.std_word_len = float(
math.sqrt(sum((l - stats.avg_word_len) ** 2 for l in word_lens) / max(len(word_lens), 1))
)
# Word frequency dispersion
freqs = list(word_counts.values())
stats.avg_word_freq = sum(freqs) / max(len(freqs), 1)
stats.std_word_freq = float(
math.sqrt(sum((f - stats.avg_word_freq) ** 2 for f in freqs) / max(len(freqs), 1))
)
# Sentence-level stats (burstiness)
stats.num_sentences = len(sentences)
sent_lens = [len(tokenize_words(s)) for s in sentences]
if sent_lens:
stats.avg_sentence_len = sum(sent_lens) / len(sent_lens)
variance = sum((l - stats.avg_sentence_len) ** 2 for l in sent_lens) / len(sent_lens)
stats.std_sentence_len = float(math.sqrt(variance))
stats.sentence_len_cv = stats.std_sentence_len / max(stats.avg_sentence_len, 0.01)
# Flesch Reading Ease
total_syllables = sum(count_syllables(w) for w in words)
if stats.num_sentences > 0 and stats.num_words > 0:
stats.readability_flesch = 206.835 - 1.015 * (stats.num_words / stats.num_sentences) \
- 84.6 * (total_syllables / stats.num_words)
return stats
def count_syllables(word: str) -> int:
"""Approximate syllable count."""
word = word.lower()
if len(word) <= 3:
return 1
vowels = "aeiouy"
count = 0
prev_vowel = False
for ch in word:
is_vowel = ch in vowels
if is_vowel and not prev_vowel:
count += 1
prev_vowel = is_vowel
if word.endswith("e"):
count = max(1, count - 1)
return max(1, count)
def compute_dispersion_score(stats: TextStats) -> dict:
"""Compute a 'human-likeness' score based on dispersion metrics.
AI text tends to have:
- Lower TTR (more repetitive vocabulary)
- Lower sentence length variance (less bursty)
- Lower word frequency dispersion (tokens cluster in high-prob zones)
- Higher readability (simpler, more uniform structure)
Human text has HIGHER dispersion across all these dimensions.
"""
# Reference values for "AI-like" vs "Human-like" text
# Based on literature (GPTZero burstiness, Fast-DetectGPT curvature)
ai_typical = {
"ttr": 0.35, # AI: ~0.30-0.40 TTR
"hapax": 0.40, # AI: fewer rare words
"sent_cv": 0.40, # AI: uniform sentence length
"word_freq_std": 1.5, # AI: low dispersion (tokens cluster)
}
human_typical = {
"ttr": 0.55, # Human: ~0.50-0.65 TTR
"hapax": 0.55, # Human: more rare words
"sent_cv": 0.75, # Human: varied sentence length
"word_freq_std": 3.0, # Human: high dispersion (varied choices)
}
attr_map = {
"ttr": "type_token_ratio",
"hapax": "hapax_ratio",
"sent_cv": "sentence_len_cv",
"word_freq_std": "std_word_freq",
}
scores = {}
for metric, ai_val in ai_typical.items():
human_val = human_typical[metric]
actual = getattr(stats, attr_map[metric])
# Normalize: 0 = AI-like, 1 = Human-like
normalized = (actual - ai_val) / max(human_val - ai_val, 0.001)
normalized = max(0.0, min(1.0, normalized))
scores[metric] = round(normalized, 3)
scores["overall_human_likeness"] = round(sum(scores.values()) / len(scores), 3)
return scores
def evaluate_copa_results(input_path: str, output_path: str) -> None:
"""Run statistical evaluation on CoPA results."""
with open(input_path, "r", encoding="utf-8") as f:
data = json.load(f)
results = data.get("results", [])
if not results:
print("[Eval] No results to evaluate.")
return
print(f"[Eval] Analyzing {len(results)} samples...")
orig_stats_list = []
rewritten_stats_list = []
dispersion_orig = []
dispersion_rewritten = []
for i, r in enumerate(results):
orig_text = r["original"]
rewritten_text = r["rewritten"]
orig_s = compute_stats(orig_text)
rewritten_s = compute_stats(rewritten_text)
orig_stats_list.append(orig_s)
rewritten_stats_list.append(rewritten_s)
disp_orig = compute_dispersion_score(orig_s)
disp_rewritten = compute_dispersion_score(rewritten_s)
dispersion_orig.append(disp_orig)
dispersion_rewritten.append(disp_rewritten)
# Aggregate
def avg_stats(stats_list, attr):
vals = [getattr(s, attr) for s in stats_list if getattr(s, attr) > 0]
return sum(vals) / max(len(vals), 1)
def avg_disp(disp_list, key):
vals = [d[key] for d in disp_list]
return sum(vals) / max(len(vals), 1)
report = {
"eval_type": "statistical_analysis",
"model": data.get("model", data.get("config", {}).get("model", "unknown")),
"num_samples": len(results),
"status": data.get("status", "unknown"),
"elapsed_seconds": data.get("elapsed_seconds", 0),
"tokens_per_second": data.get("tokens_per_second", 0),
"text_statistics": {
"original": {
"avg_words": round(avg_stats(orig_stats_list, "num_words"), 1),
"avg_sentences": round(avg_stats(orig_stats_list, "num_sentences"), 1),
"avg_sentence_len": round(avg_stats(orig_stats_list, "avg_sentence_len"), 1),
"sentence_len_cv": round(avg_stats(orig_stats_list, "sentence_len_cv"), 3),
"type_token_ratio": round(avg_stats(orig_stats_list, "type_token_ratio"), 3),
"hapax_ratio": round(avg_stats(orig_stats_list, "hapax_ratio"), 3),
"avg_word_len": round(avg_stats(orig_stats_list, "avg_word_len"), 1),
"std_word_len": round(avg_stats(orig_stats_list, "std_word_len"), 2),
"avg_word_freq": round(avg_stats(orig_stats_list, "avg_word_freq"), 1),
"std_word_freq": round(avg_stats(orig_stats_list, "std_word_freq"), 2),
"readability_flesch": round(avg_stats(orig_stats_list, "readability_flesch"), 1),
},
"rewritten": {
"avg_words": round(avg_stats(rewritten_stats_list, "num_words"), 1),
"avg_sentences": round(avg_stats(rewritten_stats_list, "num_sentences"), 1),
"avg_sentence_len": round(avg_stats(rewritten_stats_list, "avg_sentence_len"), 1),
"sentence_len_cv": round(avg_stats(rewritten_stats_list, "sentence_len_cv"), 3),
"type_token_ratio": round(avg_stats(rewritten_stats_list, "type_token_ratio"), 3),
"hapax_ratio": round(avg_stats(rewritten_stats_list, "hapax_ratio"), 3),
"avg_word_len": round(avg_stats(rewritten_stats_list, "avg_word_len"), 1),
"std_word_len": round(avg_stats(rewritten_stats_list, "std_word_len"), 2),
"avg_word_freq": round(avg_stats(rewritten_stats_list, "avg_word_freq"), 1),
"std_word_freq": round(avg_stats(rewritten_stats_list, "std_word_freq"), 2),
"readability_flesch": round(avg_stats(rewritten_stats_list, "readability_flesch"), 1),
},
},
"dispersion_analysis": {
"original": {
"ttr": round(avg_disp(dispersion_orig, "ttr"), 3),
"hapax": round(avg_disp(dispersion_orig, "hapax"), 3),
"sent_cv": round(avg_disp(dispersion_orig, "sent_cv"), 3),
"word_freq_std": round(avg_disp(dispersion_orig, "word_freq_std"), 3),
"overall_human_likeness": round(avg_disp(dispersion_orig, "overall_human_likeness"), 3),
},
"rewritten": {
"ttr": round(avg_disp(dispersion_rewritten, "ttr"), 3),
"hapax": round(avg_disp(dispersion_rewritten, "hapax"), 3),
"sent_cv": round(avg_disp(dispersion_rewritten, "sent_cv"), 3),
"word_freq_std": round(avg_disp(dispersion_rewritten, "word_freq_std"), 3),
"overall_human_likeness": round(avg_disp(dispersion_rewritten, "overall_human_likeness"), 3),
},
},
"key_findings": [],
}
# Generate key findings
orig_hl = report["dispersion_analysis"]["original"]["overall_human_likeness"]
rew_hl = report["dispersion_analysis"]["rewritten"]["overall_human_likeness"]
delta_hl = rew_hl - orig_hl
findings = [
f"Human-likeness: {orig_hl:.3f} -> {rew_hl:.3f} (delta={delta_hl:+.3f})",
f"TTR: {report['dispersion_analysis']['original']['ttr']:.3f} -> {report['dispersion_analysis']['rewritten']['ttr']:.3f} "
f"({'increased' if report['dispersion_analysis']['rewritten']['ttr'] > report['dispersion_analysis']['original']['ttr'] else 'decreased'} vocabulary diversity)",
f"Sentence CV: {report['dispersion_analysis']['original']['sent_cv']:.3f} -> {report['dispersion_analysis']['rewritten']['sent_cv']:.3f} "
f"({'more' if report['dispersion_analysis']['rewritten']['sent_cv'] > report['dispersion_analysis']['original']['sent_cv'] else 'less'} bursty sentence structure)",
f"Readability: {report['text_statistics']['original']['readability_flesch']:.0f} -> {report['text_statistics']['rewritten']['readability_flesch']:.0f} Flesch "
f"({'easier' if report['text_statistics']['rewritten']['readability_flesch'] > report['text_statistics']['original']['readability_flesch'] else 'harder'} to read)",
f"Word freq dispersion: {report['text_statistics']['original']['std_word_freq']:.2f} -> {report['text_statistics']['rewritten']['std_word_freq']:.2f} "
f"({'higher' if report['text_statistics']['rewritten']['std_word_freq'] > report['text_statistics']['original']['std_word_freq'] else 'lower'} token dispersion)",
]
report["key_findings"] = findings
# Detector evasion potential (heuristic)
evasion_potential = "LOW"
if delta_hl > 0.15:
evasion_potential = "HIGH"
elif delta_hl > 0.05:
evasion_potential = "MEDIUM"
report["evasion_potential"] = {
"rating": evasion_potential,
"human_likeness_delta": round(delta_hl, 3),
"note": "Statistical heuristic only. Real detector evaluation (Fast-DetectGPT, Binoculars, Pangram) requires Modal GPU — see next phase.",
}
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, "w", encoding="utf-8") as f:
json.dump(report, f, indent=2, ensure_ascii=False)
print(f"[Eval] Report saved to {output_path}")
print(f"[Eval] Human-likeness: {orig_hl:.3f} -> {rew_hl:.3f} (delta={delta_hl:+.3f})")
print(f"[Eval] Evasion potential: {evasion_potential}")
for f_ in findings:
print(f" - {f_}")
if __name__ == "__main__":
input_file = sys.argv[1] if len(sys.argv) > 1 else "output/copa_modal_results.json"
output_file = sys.argv[2] if len(sys.argv) > 2 else "output/eval_statistical_report.json"
evaluate_copa_results(input_file, output_file)