VictorM-Coder commited on
Commit
2bb6fdc
·
verified ·
1 Parent(s): 2b59ac0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -35
app.py CHANGED
@@ -6,17 +6,17 @@ import pandas as pd
6
  import gradio as gr
7
 
8
  # ----------------------------------------------------
9
- # LOAD CAUSAL LM (DetectGPT requires a generative LM)
10
  # ----------------------------------------------------
11
- MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
12
 
13
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
14
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
15
  model = AutoModelForCausalLM.from_pretrained(
16
  MODEL_NAME,
17
- torch_dtype=torch.float16 if device.type == "cuda" else torch.float32,
18
- device_map="auto"
19
- ).eval()
20
 
21
 
22
  # ----------------------------------------------------
@@ -34,22 +34,19 @@ def sentence_split(text):
34
  def perplexity(sentence):
35
  inputs = tokenizer(sentence, return_tensors="pt").to(device)
36
  with torch.no_grad():
37
- outputs = model(**inputs, labels=inputs["input_ids"])
38
- loss = outputs.loss
39
- return torch.exp(loss).item()
40
 
41
 
42
  # ----------------------------------------------------
43
- # SIMPLE TEXT PERTURBATION (Synonym-like noise)
44
  # ----------------------------------------------------
45
  def perturb(text):
46
  words = text.split()
47
  if len(words) < 4:
48
- return text # too short to perturb
49
-
50
  idx = np.random.randint(0, len(words))
51
- words[idx] = words[idx] + " " # small noise (DetectGPT paper trick)
52
-
53
  return " ".join(words)
54
 
55
 
@@ -58,23 +55,23 @@ def perturb(text):
58
  # ----------------------------------------------------
59
  def detectgpt_score(sentence, perturbations=5):
60
  try:
61
- orig = perplexity(sentence)
62
  except:
63
- return 0 # fallback
64
 
65
- perturbed_scores = []
66
  for _ in range(perturbations):
67
  p = perturb(sentence)
68
  try:
69
- pp = perplexity(p)
70
- perturbed_scores.append(pp)
71
  except:
72
- continue
73
 
74
- if not perturbed_scores:
75
  return 0
76
 
77
- return np.mean(perturbed_scores) - orig # DetectGPT signal
 
78
 
79
 
80
  # ----------------------------------------------------
@@ -85,43 +82,37 @@ def classify_text(text):
85
  return "⚠️ Please enter some text.", None, None
86
 
87
  sentences = sentence_split(text)
88
-
89
  results = []
90
  highlighted = []
91
-
92
- detectgpt_scores = []
93
 
94
  for s in sentences:
95
  score = detectgpt_score(s)
96
- detectgpt_scores.append(score)
97
 
98
  label = "AI" if score > 0 else "Human"
99
- conf = abs(score)
100
-
101
- results.append([s, label, f"{conf:.4f}"])
102
 
103
  if label == "AI":
104
  highlighted.append(f"<p style='color:red;font-weight:bold'>{s}</p>")
105
  else:
106
  highlighted.append(f"<p style='color:green;font-weight:bold'>{s}</p>")
107
 
108
- # -------------------------
109
- # DOCUMENT-LEVEL SCORE
110
- # -------------------------
111
- avg_score = np.mean(detectgpt_scores)
112
- doc_ai_percent = max(0, min(100, (avg_score + 1) * 50))
113
 
114
  df = pd.DataFrame(results, columns=["Sentence", "Label", "Score"])
115
  html = "\n".join(highlighted)
116
 
117
- return f"⚖️ Document AI Likelihood: {doc_ai_percent:.1f}%", html, df
118
 
119
 
120
  # ----------------------------------------------------
121
  # GRADIO UI
122
  # ----------------------------------------------------
123
  with gr.Blocks() as demo:
124
- gr.Markdown("## 🧠 Writenix DetectGPT (Turnitin-like Detector)")
125
 
126
  text_input = gr.Textbox(
127
  label="Enter text",
 
6
  import gradio as gr
7
 
8
  # ----------------------------------------------------
9
+ # LOAD CAUSAL LM (GPT-J 6B = Best balance)
10
  # ----------------------------------------------------
11
+ MODEL_NAME = "EleutherAI/gpt-j-6B"
12
 
13
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
14
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
+
16
  model = AutoModelForCausalLM.from_pretrained(
17
  MODEL_NAME,
18
+ torch_dtype=torch.float16 if device.type == "cuda" else torch.float32
19
+ ).to(device).eval()
 
20
 
21
 
22
  # ----------------------------------------------------
 
34
  def perplexity(sentence):
35
  inputs = tokenizer(sentence, return_tensors="pt").to(device)
36
  with torch.no_grad():
37
+ out = model(**inputs, labels=inputs["input_ids"])
38
+ return torch.exp(out.loss).item()
 
39
 
40
 
41
  # ----------------------------------------------------
42
+ # SIMPLE TEXT PERTURBATION
43
  # ----------------------------------------------------
44
  def perturb(text):
45
  words = text.split()
46
  if len(words) < 4:
47
+ return text
 
48
  idx = np.random.randint(0, len(words))
49
+ words[idx] += " "
 
50
  return " ".join(words)
51
 
52
 
 
55
  # ----------------------------------------------------
56
  def detectgpt_score(sentence, perturbations=5):
57
  try:
58
+ base = perplexity(sentence)
59
  except:
60
+ return 0
61
 
62
+ pert_scores = []
63
  for _ in range(perturbations):
64
  p = perturb(sentence)
65
  try:
66
+ pert_scores.append(perplexity(p))
 
67
  except:
68
+ pass
69
 
70
+ if not pert_scores:
71
  return 0
72
 
73
+ # DetectGPT signal
74
+ return np.mean(pert_scores) - base
75
 
76
 
77
  # ----------------------------------------------------
 
82
  return "⚠️ Please enter some text.", None, None
83
 
84
  sentences = sentence_split(text)
 
85
  results = []
86
  highlighted = []
87
+ scores = []
 
88
 
89
  for s in sentences:
90
  score = detectgpt_score(s)
91
+ scores.append(score)
92
 
93
  label = "AI" if score > 0 else "Human"
94
+ results.append([s, label, f"{abs(score):.4f}"])
 
 
95
 
96
  if label == "AI":
97
  highlighted.append(f"<p style='color:red;font-weight:bold'>{s}</p>")
98
  else:
99
  highlighted.append(f"<p style='color:green;font-weight:bold'>{s}</p>")
100
 
101
+ # Document-level score rescaled 0–100%
102
+ avg = np.mean(scores)
103
+ doc_ai = max(0, min(100, (avg + 1) * 50))
 
 
104
 
105
  df = pd.DataFrame(results, columns=["Sentence", "Label", "Score"])
106
  html = "\n".join(highlighted)
107
 
108
+ return f"⚖️ Document AI Likelihood: {doc_ai:.1f}%", html, df
109
 
110
 
111
  # ----------------------------------------------------
112
  # GRADIO UI
113
  # ----------------------------------------------------
114
  with gr.Blocks() as demo:
115
+ gr.Markdown("## 🧠 Writenix DetectGPT (GPT-J-6B Version)")
116
 
117
  text_input = gr.Textbox(
118
  label="Enter text",