humanizer / text_detector.py
Jay-Rajput's picture
fixaidetector
a74afb3
raw
history blame
5.38 kB
import math
import statistics
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM
from collections import Counter
class AITextDetector:
"""
Advanced AI Text Detector (2025-ready):
- Transformer classifier for AI vs Human
- Metrics: perplexity, burstiness, repetition, semantic smoothness
- Category distribution (4-way classification for interpretability)
"""
def __init__(self, model_name="roberta-base-openai-detector", device=None):
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
self.lm_model = AutoModelForCausalLM.from_pretrained("gpt2")
if device:
self.device = device
else:
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.model.to(self.device)
self.lm_model.to(self.device)
self.model.eval()
def _compute_perplexity(self, text: str) -> float:
"""
Approximate perplexity using NLL from model.
"""
encodings = self.tokenizer(text, return_tensors="pt", truncation=True).to(self.device)
with torch.no_grad():
outputs = self.lm_model(**encodings, labels=encodings.input_ids)
loss = outputs.loss.item()
return math.exp(loss)
def _compute_burstiness(self, text: str) -> float:
"""
Burstiness = variance / mean of sentence lengths.
"""
sentences = [s.strip() for s in text.split(".") if s.strip()]
lengths = [len(s.split()) for s in sentences]
if len(lengths) < 2:
return 0.0
return statistics.pvariance(lengths) / (np.mean(lengths) + 1e-8)
def _compute_repetition_score(self, text: str) -> float:
"""
Measures how often words repeat.
High repetition = more likely AI.
"""
words = [w.lower() for w in text.split() if w.isalpha()]
if not words:
return 0.0
word_counts = Counter(words)
repetition = sum(c - 1 for c in word_counts.values() if c > 1) / len(words)
return repetition
def _compute_semantic_smoothness(self, text: str) -> float:
"""
Semantic smoothness = similarity between consecutive sentences.
Higher = more consistent flow (AI often too smooth).
"""
sentences = [s.strip() for s in text.split(".") if s.strip()]
if len(sentences) < 2:
return 1.0
embeddings = self.model.base_model.get_input_embeddings()(
torch.tensor([self.tokenizer.encode(s, truncation=True, max_length=32) for s in sentences]).to(self.device)
)
embeddings = embeddings.mean(dim=1).detach().cpu().numpy()
sims = []
for i in range(len(embeddings) - 1):
v1, v2 = embeddings[i], embeddings[i + 1]
cos = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2) + 1e-8)
sims.append(cos)
return float(np.mean(sims))
def detect(self, text: str) -> dict:
"""
Run detection and return structured JSON report.
"""
# Model classification
inputs = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(self.device)
with torch.no_grad():
logits = self.model(**inputs).logits
probs = torch.softmax(logits, dim=-1).cpu().numpy()[0]
ai_prob = float(probs[1]) if len(probs) > 1 else 0.5
# Compute metrics
perplexity = self._compute_perplexity(text)
burstiness = self._compute_burstiness(text)
repetition = self._compute_repetition_score(text)
smoothness = self._compute_semantic_smoothness(text)
# Create 4-category distribution (mock scaling from ai_prob + heuristics)
distribution = {
"AI-generated": round(ai_prob * 100 * (1 - repetition), 1),
"AI-generated & AI-refined": round(ai_prob * 100 * repetition, 1),
"Human-written & AI-refined": round((1 - ai_prob) * 100 * smoothness, 1),
"Human-written": round((1 - ai_prob) * 100 * (1 - smoothness), 1)
}
# Normalize so they sum to 100
total = sum(distribution.values())
if total > 0:
distribution = {k: round(v / total * 100, 1) for k, v in distribution.items()}
overall_ai_probability = round(ai_prob, 2)
summary = f"{distribution['AI-generated']}% of text is likely AI"
return {
"summary": summary,
"overall_ai_probability": overall_ai_probability,
"category_distribution": distribution,
"metrics": {
"perplexity": round(perplexity, 2),
"burstiness": round(burstiness, 3),
"repetition_score": round(repetition, 3),
"semantic_smoothness": round(smoothness, 3),
"ai_probability": overall_ai_probability
},
"interpretation": (
"This detector uses structural patterns (perplexity, burstiness, repetition, semantic smoothness) "
"to estimate the likelihood of AI authorship. Results are probabilistic, not definitive. "
"Always apply judgment."
)
}