Jay-Rajput commited on
Commit
8c0c5f5
·
1 Parent(s): a74afb3

fixaidetector

Browse files
Files changed (1) hide show
  1. text_detector.py +111 -85
text_detector.py CHANGED
@@ -1,139 +1,165 @@
1
  import math
2
- import statistics
3
  import numpy as np
4
  import torch
5
- from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM
 
 
 
 
6
  from collections import Counter
7
 
8
 
9
  class AITextDetector:
10
  """
11
- Advanced AI Text Detector (2025-ready):
 
12
  - Transformer classifier for AI vs Human
13
  - Metrics: perplexity, burstiness, repetition, semantic smoothness
14
- - Category distribution (4-way classification for interpretability)
15
  """
16
 
17
  def __init__(self, model_name="roberta-base-openai-detector", device=None):
18
- self.tokenizer = AutoTokenizer.from_pretrained(model_name)
19
- self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
20
- self.lm_model = AutoModelForCausalLM.from_pretrained("gpt2")
21
-
22
- if device:
23
- self.device = device
24
- else:
25
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
26
 
27
- self.model.to(self.device)
28
- self.lm_model.to(self.device)
 
29
  self.model.eval()
30
 
31
- def _compute_perplexity(self, text: str) -> float:
32
- """
33
- Approximate perplexity using NLL from model.
34
- """
35
- encodings = self.tokenizer(text, return_tensors="pt", truncation=True).to(self.device)
 
 
 
 
 
 
 
 
 
 
36
  with torch.no_grad():
37
  outputs = self.lm_model(**encodings, labels=encodings.input_ids)
38
  loss = outputs.loss.item()
39
- return math.exp(loss)
40
 
41
- def _compute_burstiness(self, text: str) -> float:
42
- """
43
- Burstiness = variance / mean of sentence lengths.
44
- """
45
- sentences = [s.strip() for s in text.split(".") if s.strip()]
46
- lengths = [len(s.split()) for s in sentences]
47
 
48
- if len(lengths) < 2:
 
 
 
49
  return 0.0
 
 
50
 
51
- return statistics.pvariance(lengths) / (np.mean(lengths) + 1e-8)
52
-
53
- def _compute_repetition_score(self, text: str) -> float:
54
- """
55
- Measures how often words repeat.
56
- High repetition = more likely AI.
57
- """
58
- words = [w.lower() for w in text.split() if w.isalpha()]
59
  if not words:
60
  return 0.0
 
 
 
61
 
62
- word_counts = Counter(words)
63
- repetition = sum(c - 1 for c in word_counts.values() if c > 1) / len(words)
64
- return repetition
65
-
66
- def _compute_semantic_smoothness(self, text: str) -> float:
67
  """
68
- Semantic smoothness = similarity between consecutive sentences.
69
- Higher = more consistent flow (AI often too smooth).
70
  """
71
- sentences = [s.strip() for s in text.split(".") if s.strip()]
72
  if len(sentences) < 2:
73
  return 1.0
74
 
75
- embeddings = self.model.base_model.get_input_embeddings()(
76
- torch.tensor([self.tokenizer.encode(s, truncation=True, max_length=32) for s in sentences]).to(self.device)
77
- )
78
- embeddings = embeddings.mean(dim=1).detach().cpu().numpy()
79
-
80
- sims = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  for i in range(len(embeddings) - 1):
82
- v1, v2 = embeddings[i], embeddings[i + 1]
83
- cos = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2) + 1e-8)
84
- sims.append(cos)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
- return float(np.mean(sims))
87
-
88
- def detect(self, text: str) -> dict:
89
- """
90
- Run detection and return structured JSON report.
91
- """
92
-
93
- # Model classification
94
- inputs = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(self.device)
95
  with torch.no_grad():
96
  logits = self.model(**inputs).logits
97
- probs = torch.softmax(logits, dim=-1).cpu().numpy()[0]
98
 
99
- ai_prob = float(probs[1]) if len(probs) > 1 else 0.5
100
 
101
- # Compute metrics
102
  perplexity = self._compute_perplexity(text)
103
  burstiness = self._compute_burstiness(text)
104
  repetition = self._compute_repetition_score(text)
105
  smoothness = self._compute_semantic_smoothness(text)
106
 
107
- # Create 4-category distribution (mock scaling from ai_prob + heuristics)
108
  distribution = {
109
- "AI-generated": round(ai_prob * 100 * (1 - repetition), 1),
110
- "AI-generated & AI-refined": round(ai_prob * 100 * repetition, 1),
111
- "Human-written & AI-refined": round((1 - ai_prob) * 100 * smoothness, 1),
112
- "Human-written": round((1 - ai_prob) * 100 * (1 - smoothness), 1)
113
  }
114
-
115
- # Normalize so they sum to 100
116
  total = sum(distribution.values())
117
  if total > 0:
118
- distribution = {k: round(v / total * 100, 1) for k, v in distribution.items()}
 
119
 
120
- overall_ai_probability = round(ai_prob, 2)
121
- summary = f"{distribution['AI-generated']}% of text is likely AI"
122
 
123
  return {
124
- "summary": summary,
125
- "overall_ai_probability": overall_ai_probability,
126
- "category_distribution": distribution,
127
  "metrics": {
128
- "perplexity": round(perplexity, 2),
129
  "burstiness": round(burstiness, 3),
130
- "repetition_score": round(repetition, 3),
131
  "semantic_smoothness": round(smoothness, 3),
132
- "ai_probability": overall_ai_probability
133
  },
134
- "interpretation": (
135
- "This detector uses structural patterns (perplexity, burstiness, repetition, semantic smoothness) "
136
- "to estimate the likelihood of AI authorship. Results are probabilistic, not definitive. "
137
- "Always apply judgment."
138
- )
139
  }
 
1
  import math
2
+ import re
3
  import numpy as np
4
  import torch
5
+ from transformers import (
6
+ AutoTokenizer,
7
+ AutoModelForSequenceClassification,
8
+ AutoModelForCausalLM,
9
+ )
10
  from collections import Counter
11
 
12
 
13
  class AITextDetector:
14
  """
15
+ AI Text Detector
16
+
17
  - Transformer classifier for AI vs Human
18
  - Metrics: perplexity, burstiness, repetition, semantic smoothness
19
+ - Returns AI-vs-Human probability + category distribution
20
  """
21
 
22
  def __init__(self, model_name="roberta-base-openai-detector", device=None):
23
+ # Device setup
24
+ self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
 
 
 
 
 
 
25
 
26
+ # Classifier model & tokenizer
27
+ self.classifier_tokenizer = AutoTokenizer.from_pretrained(model_name)
28
+ self.model = AutoModelForSequenceClassification.from_pretrained(model_name).to(self.device)
29
  self.model.eval()
30
 
31
+ # Language model for perplexity (lighter than full GPT-2 if needed)
32
+ self.lm_tokenizer = AutoTokenizer.from_pretrained("gpt2")
33
+ self.lm_model = AutoModelForCausalLM.from_pretrained("gpt2").to(self.device)
34
+ self.lm_model.eval()
35
+
36
+ # ------------------ Metrics ------------------
37
+ def _compute_perplexity(self, text: str, max_length: int = 512):
38
+ """Compute perplexity using GPT-2 LM."""
39
+ encodings = self.lm_tokenizer(
40
+ text,
41
+ return_tensors="pt",
42
+ truncation=True,
43
+ max_length=max_length,
44
+ ).to(self.device)
45
+
46
  with torch.no_grad():
47
  outputs = self.lm_model(**encodings, labels=encodings.input_ids)
48
  loss = outputs.loss.item()
 
49
 
50
+ # Clamp to avoid overflow
51
+ return float(min(math.exp(loss), 1e4))
 
 
 
 
52
 
53
+ def _compute_burstiness(self, text: str):
54
+ """Variance of sentence lengths (burstiness)."""
55
+ sentences = [s.strip() for s in re.split(r"[.!?]", text) if s.strip()]
56
+ if len(sentences) < 2:
57
  return 0.0
58
+ lengths = [len(s.split()) for s in sentences]
59
+ return float(np.var(lengths))
60
 
61
+ def _compute_repetition_score(self, text: str):
62
+ """Repetition = proportion of duplicate words."""
63
+ words = [w.lower() for w in re.findall(r"\b\w+\b", text)]
 
 
 
 
 
64
  if not words:
65
  return 0.0
66
+ counts = Counter(words)
67
+ repeated = sum(c - 1 for c in counts.values() if c > 1)
68
+ return repeated / len(words)
69
 
70
+ def _compute_semantic_smoothness(self, text: str):
 
 
 
 
71
  """
72
+ Semantic smoothness = avg cosine similarity between consecutive sentence embeddings.
73
+ Uses last hidden states instead of raw embeddings.
74
  """
75
+ sentences = [s.strip() for s in re.split(r"[.!?]", text) if s.strip()]
76
  if len(sentences) < 2:
77
  return 1.0
78
 
79
+ embeddings = []
80
+ for s in sentences:
81
+ encodings = self.classifier_tokenizer(
82
+ s,
83
+ return_tensors="pt",
84
+ truncation=True,
85
+ padding=True,
86
+ max_length=128,
87
+ ).to(self.device)
88
+
89
+ with torch.no_grad():
90
+ outputs = self.model(
91
+ **encodings,
92
+ output_hidden_states=True,
93
+ )
94
+ hidden_states = outputs.hidden_states[-1] # last layer
95
+ sent_emb = hidden_states.mean(dim=1).cpu().numpy()
96
+ embeddings.append(sent_emb)
97
+
98
+ similarities = []
99
  for i in range(len(embeddings) - 1):
100
+ a, b = embeddings[i], embeddings[i + 1]
101
+ num = float(np.dot(a, b.T))
102
+ denom = np.linalg.norm(a) * np.linalg.norm(b)
103
+ if denom > 0:
104
+ similarities.append(num / denom)
105
+ return float(np.mean(similarities)) if similarities else 1.0
106
+
107
+ # ------------------ Main detection ------------------
108
+ def detect(self, text: str):
109
+ """Run detection pipeline and return results."""
110
+ # Empty text case
111
+ if not text.strip():
112
+ return {
113
+ "ai_probability": 0.0,
114
+ "metrics": {},
115
+ "distribution": {},
116
+ "final_label": "empty",
117
+ }
118
+
119
+ # Classifier prediction
120
+ inputs = self.classifier_tokenizer(
121
+ text,
122
+ return_tensors="pt",
123
+ truncation=True,
124
+ padding=True,
125
+ max_length=512,
126
+ ).to(self.device)
127
 
 
 
 
 
 
 
 
 
 
128
  with torch.no_grad():
129
  logits = self.model(**inputs).logits
130
+ probs = torch.softmax(logits, dim=1).cpu().numpy()[0]
131
 
132
+ human_prob, ai_prob = float(probs[0]), float(probs[1])
133
 
134
+ # Extra metrics
135
  perplexity = self._compute_perplexity(text)
136
  burstiness = self._compute_burstiness(text)
137
  repetition = self._compute_repetition_score(text)
138
  smoothness = self._compute_semantic_smoothness(text)
139
 
140
+ # Normalize distribution
141
  distribution = {
142
+ "Human-written": round(human_prob * 100, 2),
143
+ "AI-generated": round(ai_prob * 100 * (1 - repetition), 2),
144
+ "AI-generated & AI-refined": round(ai_prob * 100 * repetition, 2),
145
+ "Mixed": round(ai_prob * 100 * (1 - smoothness), 2),
146
  }
 
 
147
  total = sum(distribution.values())
148
  if total > 0:
149
+ for k in distribution:
150
+ distribution[k] = round(distribution[k] / total * 100, 2)
151
 
152
+ # Final label
153
+ final_label = max(distribution, key=distribution.get)
154
 
155
  return {
156
+ "ai_probability": round(ai_prob, 4),
 
 
157
  "metrics": {
158
+ "perplexity": round(perplexity, 3),
159
  "burstiness": round(burstiness, 3),
160
+ "repetition": round(repetition, 3),
161
  "semantic_smoothness": round(smoothness, 3),
 
162
  },
163
+ "distribution": distribution,
164
+ "final_label": final_label,
 
 
 
165
  }