VictorM-Coder commited on
Commit
2b59ac0
·
verified ·
1 Parent(s): f2f742a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -63
app.py CHANGED
@@ -1,113 +1,141 @@
1
  import torch
2
- import torch.nn.functional as F
3
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
  import re
 
5
  import pandas as pd
6
  import gradio as gr
7
 
8
- # -----------------------------
9
- # WORKING PUBLIC AI DETECTOR
10
- # -----------------------------
11
- MODEL_NAME = "openai-community/roberta-base-openai-detector"
12
 
13
- # -----------------------------
14
- # LOAD MODEL
15
- # -----------------------------
16
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
17
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
- dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported()) else torch.float32
19
- model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, torch_dtype=dtype)
20
- model.to(device).eval()
 
 
21
 
22
 
23
- # -----------------------------
24
- # SENTENCE SPLITTER (SAFE)
25
- # -----------------------------
26
  def sentence_split(text):
27
- # Replace newlines with periods to avoid broken sentences
28
  text = text.replace("\n", ". ")
29
-
30
- # Regex split on . ! ? but keep them
31
  sentences = re.split(r'(?<=[.!?])\s+', text)
32
-
33
- # Clean and filter
34
  return [s.strip() for s in sentences if s.strip()]
35
 
36
 
37
- # -----------------------------
38
- # AI DETECTION FUNCTION
39
- # -----------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  def classify_text(text):
41
  if not text.strip():
42
  return "⚠️ Please enter some text.", None, None
43
 
44
  sentences = sentence_split(text)
45
- if not sentences:
46
- return "⚠️ No content detected.", None, None
47
-
48
- # Tokenize per sentence
49
- inputs = tokenizer(
50
- sentences,
51
- return_tensors="pt",
52
- padding=True,
53
- truncation=True,
54
- max_length=512
55
- ).to(device)
56
-
57
- with torch.no_grad():
58
- logits = model(**inputs).logits
59
- probs = F.softmax(logits, dim=-1).cpu()
60
- preds = torch.argmax(probs, dim=-1).cpu()
61
 
62
  results = []
63
- highlighted_sentences = []
 
 
64
 
65
- for i, sentence in enumerate(sentences):
66
- pred = preds[i].item()
67
- conf = probs[i, pred].item()
68
 
69
- # Model: 1 = AI, 0 = Human
70
- label = "AI" if pred == 1 else "Human"
71
- conf_text = f"{conf:.2f}"
72
 
73
- results.append([sentence, label, conf_text])
74
 
75
  if label == "AI":
76
- highlighted_sentences.append(f"<p style='color:red; font-weight:bold'>{sentence}</p>")
77
  else:
78
- highlighted_sentences.append(f"<p style='color:green; font-weight:bold'>{sentence}</p>")
79
 
80
- # -----------------------------
81
- # DOCUMENT AI SCORE
82
- # -----------------------------
83
- avg = torch.mean(probs, dim=0)
84
- ai_percent = avg[1].item() * 100
85
 
86
- highlighted_html = "\n".join(highlighted_sentences)
87
- df = pd.DataFrame(results, columns=["Sentence", "Classification", "Confidence"])
88
 
89
- return f"⚖️ Document AI Likelihood: {ai_percent:.1f}%", highlighted_html, df
90
 
91
 
92
- # -----------------------------
93
  # GRADIO UI
94
- # -----------------------------
95
  with gr.Blocks() as demo:
96
- gr.Markdown("## 🧠 Writenix AI Detector (Sentence-Level, Stable Version)")
97
 
98
  text_input = gr.Textbox(
99
  label="Enter text",
100
  lines=14,
101
- placeholder="Paste your essay, article, or content here…"
102
  )
103
 
104
  classify_btn = gr.Button("🚀 Detect AI")
105
 
106
  ai_score = gr.Label(label="Overall AI Likelihood")
107
  highlighted = gr.HTML()
108
- table = gr.Dataframe(headers=["Sentence", "Classification", "Confidence"], wrap=True)
109
 
110
- classify_btn.click(classify_text, inputs=text_input, outputs=[ai_score, highlighted, table])
111
 
112
  if __name__ == "__main__":
113
  demo.launch()
 
1
  import torch
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
 
3
  import re
4
+ import numpy as np
5
  import pandas as pd
6
  import gradio as gr
7
 
8
+ # ----------------------------------------------------
9
+ # LOAD CAUSAL LM (DetectGPT requires a generative LM)
10
+ # ----------------------------------------------------
11
+ MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
12
 
 
 
 
13
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
14
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
+ model = AutoModelForCausalLM.from_pretrained(
16
+ MODEL_NAME,
17
+ torch_dtype=torch.float16 if device.type == "cuda" else torch.float32,
18
+ device_map="auto"
19
+ ).eval()
20
 
21
 
22
+ # ----------------------------------------------------
23
+ # SENTENCE SPLITTER
24
+ # ----------------------------------------------------
25
  def sentence_split(text):
 
26
  text = text.replace("\n", ". ")
 
 
27
  sentences = re.split(r'(?<=[.!?])\s+', text)
 
 
28
  return [s.strip() for s in sentences if s.strip()]
29
 
30
 
31
+ # ----------------------------------------------------
32
+ # PERPLEXITY FUNCTION
33
+ # ----------------------------------------------------
34
+ def perplexity(sentence):
35
+ inputs = tokenizer(sentence, return_tensors="pt").to(device)
36
+ with torch.no_grad():
37
+ outputs = model(**inputs, labels=inputs["input_ids"])
38
+ loss = outputs.loss
39
+ return torch.exp(loss).item()
40
+
41
+
42
+ # ----------------------------------------------------
43
+ # SIMPLE TEXT PERTURBATION (Synonym-like noise)
44
+ # ----------------------------------------------------
45
+ def perturb(text):
46
+ words = text.split()
47
+ if len(words) < 4:
48
+ return text # too short to perturb
49
+
50
+ idx = np.random.randint(0, len(words))
51
+ words[idx] = words[idx] + " " # small noise (DetectGPT paper trick)
52
+
53
+ return " ".join(words)
54
+
55
+
56
+ # ----------------------------------------------------
57
+ # DETECTGPT SCORE
58
+ # ----------------------------------------------------
59
+ def detectgpt_score(sentence, perturbations=5):
60
+ try:
61
+ orig = perplexity(sentence)
62
+ except:
63
+ return 0 # fallback
64
+
65
+ perturbed_scores = []
66
+ for _ in range(perturbations):
67
+ p = perturb(sentence)
68
+ try:
69
+ pp = perplexity(p)
70
+ perturbed_scores.append(pp)
71
+ except:
72
+ continue
73
+
74
+ if not perturbed_scores:
75
+ return 0
76
+
77
+ return np.mean(perturbed_scores) - orig # DetectGPT signal
78
+
79
+
80
+ # ----------------------------------------------------
81
+ # MAIN CLASSIFIER
82
+ # ----------------------------------------------------
83
  def classify_text(text):
84
  if not text.strip():
85
  return "⚠️ Please enter some text.", None, None
86
 
87
  sentences = sentence_split(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  results = []
90
+ highlighted = []
91
+
92
+ detectgpt_scores = []
93
 
94
+ for s in sentences:
95
+ score = detectgpt_score(s)
96
+ detectgpt_scores.append(score)
97
 
98
+ label = "AI" if score > 0 else "Human"
99
+ conf = abs(score)
 
100
 
101
+ results.append([s, label, f"{conf:.4f}"])
102
 
103
  if label == "AI":
104
+ highlighted.append(f"<p style='color:red;font-weight:bold'>{s}</p>")
105
  else:
106
+ highlighted.append(f"<p style='color:green;font-weight:bold'>{s}</p>")
107
 
108
+ # -------------------------
109
+ # DOCUMENT-LEVEL SCORE
110
+ # -------------------------
111
+ avg_score = np.mean(detectgpt_scores)
112
+ doc_ai_percent = max(0, min(100, (avg_score + 1) * 50))
113
 
114
+ df = pd.DataFrame(results, columns=["Sentence", "Label", "Score"])
115
+ html = "\n".join(highlighted)
116
 
117
+ return f"⚖️ Document AI Likelihood: {doc_ai_percent:.1f}%", html, df
118
 
119
 
120
+ # ----------------------------------------------------
121
  # GRADIO UI
122
+ # ----------------------------------------------------
123
  with gr.Blocks() as demo:
124
+ gr.Markdown("## 🧠 Writenix DetectGPT (Turnitin-like Detector)")
125
 
126
  text_input = gr.Textbox(
127
  label="Enter text",
128
  lines=14,
129
+ placeholder="Paste your essay here…"
130
  )
131
 
132
  classify_btn = gr.Button("🚀 Detect AI")
133
 
134
  ai_score = gr.Label(label="Overall AI Likelihood")
135
  highlighted = gr.HTML()
136
+ table = gr.Dataframe(headers=["Sentence", "Label", "Score"], wrap=True)
137
 
138
+ classify_btn.click(classify_text, text_input, [ai_score, highlighted, table])
139
 
140
  if __name__ == "__main__":
141
  demo.launch()