evasion-detection-artifacts / src /eval_statistical.py

Upload src/eval_statistical.py with huggingface_hub

7522ee5 verified 1 day ago

13.5 kB

	"""
	Lightweight statistical evaluation — no torch/transformers needed.
	Computes the exact text dispersion metrics that AI detectors exploit:
	- Vocabulary richness (TTR, Hapax ratio)
	- Sentence length variance (burstiness)
	- Word frequency dispersion
	- Token repetition patterns
	- Readability scores

	These are the features detectors like GPTZero (perplexity + burstiness),
	Fast-DetectGPT (curvature), and Binoculars (cross-perplexity) exploit.
	"""

	from __future__ import annotations

	import json
	import math
	import os
	import re
	import sys
	from collections import Counter
	from dataclasses import dataclass, field


	@dataclass
	class TextStats:
	"""Statistical profile of a single text."""
	num_words: int = 0
	num_sentences: int = 0
	num_unique_words: int = 0
	type_token_ratio: float = 0.0 # vocabulary richness
	hapax_ratio: float = 0.0 # words appearing exactly once
	avg_word_len: float = 0.0
	std_word_len: float = 0.0
	avg_sentence_len: float = 0.0 # mean sentence length
	std_sentence_len: float = 0.0 # burstiness proxy
	sentence_len_cv: float = 0.0 # coefficient of variation
	avg_word_freq: float = 0.0 # mean frequency of words
	std_word_freq: float = 0.0 # dispersion of word frequencies
	readability_flesch: float = 0.0 # Flesch Reading Ease


	def tokenize_sentences(text: str) -> list[str]:
	"""Simple sentence tokenizer."""
	return [s.strip() for s in re.split(r'[.!?]+', text) if s.strip() and len(s.strip().split()) >= 3]


	def tokenize_words(text: str) -> list[str]:
	"""Simple word tokenizer — lowercase, strip punctuation."""
	return [w.lower().strip('.,;:!?()[]{}"\'-') for w in text.split() if w.strip('.,;:!?()[]{}"\'-')]


	def compute_stats(text: str) -> TextStats:
	"""Compute all statistical metrics for a text."""
	stats = TextStats()
	words = tokenize_words(text)
	sentences = tokenize_sentences(text)

	if not words:
	return stats

	# Word-level stats
	stats.num_words = len(words)
	stats.num_unique_words = len(set(words))
	stats.type_token_ratio = stats.num_unique_words / max(stats.num_words, 1)

	word_counts = Counter(words)
	hapax = sum(1 for c in word_counts.values() if c == 1)
	stats.hapax_ratio = hapax / max(stats.num_unique_words, 1)

	word_lens = [len(w) for w in words]
	stats.avg_word_len = sum(word_lens) / max(len(word_lens), 1)
	stats.std_word_len = float(
	math.sqrt(sum((l - stats.avg_word_len) ** 2 for l in word_lens) / max(len(word_lens), 1))
	)

	# Word frequency dispersion
	freqs = list(word_counts.values())
	stats.avg_word_freq = sum(freqs) / max(len(freqs), 1)
	stats.std_word_freq = float(
	math.sqrt(sum((f - stats.avg_word_freq) ** 2 for f in freqs) / max(len(freqs), 1))
	)

	# Sentence-level stats (burstiness)
	stats.num_sentences = len(sentences)
	sent_lens = [len(tokenize_words(s)) for s in sentences]
	if sent_lens:
	stats.avg_sentence_len = sum(sent_lens) / len(sent_lens)
	variance = sum((l - stats.avg_sentence_len) ** 2 for l in sent_lens) / len(sent_lens)
	stats.std_sentence_len = float(math.sqrt(variance))
	stats.sentence_len_cv = stats.std_sentence_len / max(stats.avg_sentence_len, 0.01)

	# Flesch Reading Ease
	total_syllables = sum(count_syllables(w) for w in words)
	if stats.num_sentences > 0 and stats.num_words > 0:
	stats.readability_flesch = 206.835 - 1.015 * (stats.num_words / stats.num_sentences) \
	- 84.6 * (total_syllables / stats.num_words)

	return stats


	def count_syllables(word: str) -> int:
	"""Approximate syllable count."""
	word = word.lower()
	if len(word) <= 3:
	return 1
	vowels = "aeiouy"
	count = 0
	prev_vowel = False
	for ch in word:
	is_vowel = ch in vowels
	if is_vowel and not prev_vowel:
	count += 1
	prev_vowel = is_vowel
	if word.endswith("e"):
	count = max(1, count - 1)
	return max(1, count)


	def compute_dispersion_score(stats: TextStats) -> dict:
	"""Compute a 'human-likeness' score based on dispersion metrics.

	AI text tends to have:
	- Lower TTR (more repetitive vocabulary)
	- Lower sentence length variance (less bursty)
	- Lower word frequency dispersion (tokens cluster in high-prob zones)
	- Higher readability (simpler, more uniform structure)

	Human text has HIGHER dispersion across all these dimensions.
	"""
	# Reference values for "AI-like" vs "Human-like" text
	# Based on literature (GPTZero burstiness, Fast-DetectGPT curvature)
	ai_typical = {
	"ttr": 0.35, # AI: ~0.30-0.40 TTR
	"hapax": 0.40, # AI: fewer rare words
	"sent_cv": 0.40, # AI: uniform sentence length
	"word_freq_std": 1.5, # AI: low dispersion (tokens cluster)
	}
	human_typical = {
	"ttr": 0.55, # Human: ~0.50-0.65 TTR
	"hapax": 0.55, # Human: more rare words
	"sent_cv": 0.75, # Human: varied sentence length
	"word_freq_std": 3.0, # Human: high dispersion (varied choices)
	}

	attr_map = {
	"ttr": "type_token_ratio",
	"hapax": "hapax_ratio",
	"sent_cv": "sentence_len_cv",
	"word_freq_std": "std_word_freq",
	}

	scores = {}
	for metric, ai_val in ai_typical.items():
	human_val = human_typical[metric]
	actual = getattr(stats, attr_map[metric])
	# Normalize: 0 = AI-like, 1 = Human-like
	normalized = (actual - ai_val) / max(human_val - ai_val, 0.001)
	normalized = max(0.0, min(1.0, normalized))
	scores[metric] = round(normalized, 3)

	scores["overall_human_likeness"] = round(sum(scores.values()) / len(scores), 3)
	return scores


	def evaluate_copa_results(input_path: str, output_path: str) -> None:
	"""Run statistical evaluation on CoPA results."""
	with open(input_path, "r", encoding="utf-8") as f:
	data = json.load(f)

	results = data.get("results", [])
	if not results:
	print("[Eval] No results to evaluate.")
	return

	print(f"[Eval] Analyzing {len(results)} samples...")

	orig_stats_list = []
	rewritten_stats_list = []
	dispersion_orig = []
	dispersion_rewritten = []

	for i, r in enumerate(results):
	orig_text = r["original"]
	rewritten_text = r["rewritten"]

	orig_s = compute_stats(orig_text)
	rewritten_s = compute_stats(rewritten_text)

	orig_stats_list.append(orig_s)
	rewritten_stats_list.append(rewritten_s)

	disp_orig = compute_dispersion_score(orig_s)
	disp_rewritten = compute_dispersion_score(rewritten_s)
	dispersion_orig.append(disp_orig)
	dispersion_rewritten.append(disp_rewritten)

	# Aggregate
	def avg_stats(stats_list, attr):
	vals = [getattr(s, attr) for s in stats_list if getattr(s, attr) > 0]
	return sum(vals) / max(len(vals), 1)

	def avg_disp(disp_list, key):
	vals = [d[key] for d in disp_list]
	return sum(vals) / max(len(vals), 1)

	report = {
	"eval_type": "statistical_analysis",
	"model": data.get("model", data.get("config", {}).get("model", "unknown")),
	"num_samples": len(results),
	"status": data.get("status", "unknown"),
	"elapsed_seconds": data.get("elapsed_seconds", 0),
	"tokens_per_second": data.get("tokens_per_second", 0),

	"text_statistics": {
	"original": {
	"avg_words": round(avg_stats(orig_stats_list, "num_words"), 1),
	"avg_sentences": round(avg_stats(orig_stats_list, "num_sentences"), 1),
	"avg_sentence_len": round(avg_stats(orig_stats_list, "avg_sentence_len"), 1),
	"sentence_len_cv": round(avg_stats(orig_stats_list, "sentence_len_cv"), 3),
	"type_token_ratio": round(avg_stats(orig_stats_list, "type_token_ratio"), 3),
	"hapax_ratio": round(avg_stats(orig_stats_list, "hapax_ratio"), 3),
	"avg_word_len": round(avg_stats(orig_stats_list, "avg_word_len"), 1),
	"std_word_len": round(avg_stats(orig_stats_list, "std_word_len"), 2),
	"avg_word_freq": round(avg_stats(orig_stats_list, "avg_word_freq"), 1),
	"std_word_freq": round(avg_stats(orig_stats_list, "std_word_freq"), 2),
	"readability_flesch": round(avg_stats(orig_stats_list, "readability_flesch"), 1),
	},
	"rewritten": {
	"avg_words": round(avg_stats(rewritten_stats_list, "num_words"), 1),
	"avg_sentences": round(avg_stats(rewritten_stats_list, "num_sentences"), 1),
	"avg_sentence_len": round(avg_stats(rewritten_stats_list, "avg_sentence_len"), 1),
	"sentence_len_cv": round(avg_stats(rewritten_stats_list, "sentence_len_cv"), 3),
	"type_token_ratio": round(avg_stats(rewritten_stats_list, "type_token_ratio"), 3),
	"hapax_ratio": round(avg_stats(rewritten_stats_list, "hapax_ratio"), 3),
	"avg_word_len": round(avg_stats(rewritten_stats_list, "avg_word_len"), 1),
	"std_word_len": round(avg_stats(rewritten_stats_list, "std_word_len"), 2),
	"avg_word_freq": round(avg_stats(rewritten_stats_list, "avg_word_freq"), 1),
	"std_word_freq": round(avg_stats(rewritten_stats_list, "std_word_freq"), 2),
	"readability_flesch": round(avg_stats(rewritten_stats_list, "readability_flesch"), 1),
	},
	},

	"dispersion_analysis": {
	"original": {
	"ttr": round(avg_disp(dispersion_orig, "ttr"), 3),
	"hapax": round(avg_disp(dispersion_orig, "hapax"), 3),
	"sent_cv": round(avg_disp(dispersion_orig, "sent_cv"), 3),
	"word_freq_std": round(avg_disp(dispersion_orig, "word_freq_std"), 3),
	"overall_human_likeness": round(avg_disp(dispersion_orig, "overall_human_likeness"), 3),
	},
	"rewritten": {
	"ttr": round(avg_disp(dispersion_rewritten, "ttr"), 3),
	"hapax": round(avg_disp(dispersion_rewritten, "hapax"), 3),
	"sent_cv": round(avg_disp(dispersion_rewritten, "sent_cv"), 3),
	"word_freq_std": round(avg_disp(dispersion_rewritten, "word_freq_std"), 3),
	"overall_human_likeness": round(avg_disp(dispersion_rewritten, "overall_human_likeness"), 3),
	},
	},

	"key_findings": [],
	}

	# Generate key findings
	orig_hl = report["dispersion_analysis"]["original"]["overall_human_likeness"]
	rew_hl = report["dispersion_analysis"]["rewritten"]["overall_human_likeness"]
	delta_hl = rew_hl - orig_hl

	findings = [
	f"Human-likeness: {orig_hl:.3f} -> {rew_hl:.3f} (delta={delta_hl:+.3f})",
	f"TTR: {report['dispersion_analysis']['original']['ttr']:.3f} -> {report['dispersion_analysis']['rewritten']['ttr']:.3f} "
	f"({'increased' if report['dispersion_analysis']['rewritten']['ttr'] > report['dispersion_analysis']['original']['ttr'] else 'decreased'} vocabulary diversity)",
	f"Sentence CV: {report['dispersion_analysis']['original']['sent_cv']:.3f} -> {report['dispersion_analysis']['rewritten']['sent_cv']:.3f} "
	f"({'more' if report['dispersion_analysis']['rewritten']['sent_cv'] > report['dispersion_analysis']['original']['sent_cv'] else 'less'} bursty sentence structure)",
	f"Readability: {report['text_statistics']['original']['readability_flesch']:.0f} -> {report['text_statistics']['rewritten']['readability_flesch']:.0f} Flesch "
	f"({'easier' if report['text_statistics']['rewritten']['readability_flesch'] > report['text_statistics']['original']['readability_flesch'] else 'harder'} to read)",
	f"Word freq dispersion: {report['text_statistics']['original']['std_word_freq']:.2f} -> {report['text_statistics']['rewritten']['std_word_freq']:.2f} "
	f"({'higher' if report['text_statistics']['rewritten']['std_word_freq'] > report['text_statistics']['original']['std_word_freq'] else 'lower'} token dispersion)",
	]
	report["key_findings"] = findings

	# Detector evasion potential (heuristic)
	evasion_potential = "LOW"
	if delta_hl > 0.15:
	evasion_potential = "HIGH"
	elif delta_hl > 0.05:
	evasion_potential = "MEDIUM"

	report["evasion_potential"] = {
	"rating": evasion_potential,
	"human_likeness_delta": round(delta_hl, 3),
	"note": "Statistical heuristic only. Real detector evaluation (Fast-DetectGPT, Binoculars, Pangram) requires Modal GPU — see next phase.",
	}

	os.makedirs(os.path.dirname(output_path), exist_ok=True)
	with open(output_path, "w", encoding="utf-8") as f:
	json.dump(report, f, indent=2, ensure_ascii=False)

	print(f"[Eval] Report saved to {output_path}")
	print(f"[Eval] Human-likeness: {orig_hl:.3f} -> {rew_hl:.3f} (delta={delta_hl:+.3f})")
	print(f"[Eval] Evasion potential: {evasion_potential}")
	for f_ in findings:
	print(f" - {f_}")


	if __name__ == "__main__":
	input_file = sys.argv[1] if len(sys.argv) > 1 else "output/copa_modal_results.json"
	output_file = sys.argv[2] if len(sys.argv) > 2 else "output/eval_statistical_report.json"
	evaluate_copa_results(input_file, output_file)