Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,16 +6,15 @@ import pandas as pd
|
|
| 6 |
import gradio as gr
|
| 7 |
|
| 8 |
# ----------------------------------------------------
|
| 9 |
-
# LOAD CAUSAL LM (
|
| 10 |
# ----------------------------------------------------
|
| 11 |
-
MODEL_NAME = "
|
| 12 |
|
| 13 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 14 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 15 |
|
| 16 |
model = AutoModelForCausalLM.from_pretrained(
|
| 17 |
-
MODEL_NAME
|
| 18 |
-
torch_dtype=torch.float16 if device.type == "cuda" else torch.float32
|
| 19 |
).to(device).eval()
|
| 20 |
|
| 21 |
|
|
@@ -29,17 +28,17 @@ def sentence_split(text):
|
|
| 29 |
|
| 30 |
|
| 31 |
# ----------------------------------------------------
|
| 32 |
-
# PERPLEXITY FUNCTION
|
| 33 |
# ----------------------------------------------------
|
| 34 |
def perplexity(sentence):
|
| 35 |
inputs = tokenizer(sentence, return_tensors="pt").to(device)
|
| 36 |
with torch.no_grad():
|
| 37 |
out = model(**inputs, labels=inputs["input_ids"])
|
| 38 |
-
return torch.exp(out.loss)
|
| 39 |
|
| 40 |
|
| 41 |
# ----------------------------------------------------
|
| 42 |
-
# SIMPLE TEXT PERTURBATION
|
| 43 |
# ----------------------------------------------------
|
| 44 |
def perturb(text):
|
| 45 |
words = text.split()
|
|
@@ -65,12 +64,13 @@ def detectgpt_score(sentence, perturbations=5):
|
|
| 65 |
try:
|
| 66 |
pert_scores.append(perplexity(p))
|
| 67 |
except:
|
| 68 |
-
|
| 69 |
|
| 70 |
if not pert_scores:
|
| 71 |
return 0
|
| 72 |
|
| 73 |
-
|
|
|
|
| 74 |
|
| 75 |
|
| 76 |
# ----------------------------------------------------
|
|
@@ -90,13 +90,19 @@ def classify_text(text):
|
|
| 90 |
scores.append(score)
|
| 91 |
|
| 92 |
label = "AI" if score > 0 else "Human"
|
|
|
|
| 93 |
results.append([s, label, f"{abs(score):.4f}"])
|
| 94 |
|
| 95 |
if label == "AI":
|
| 96 |
-
highlighted.append(
|
|
|
|
|
|
|
| 97 |
else:
|
| 98 |
-
highlighted.append(
|
|
|
|
|
|
|
| 99 |
|
|
|
|
| 100 |
avg = np.mean(scores)
|
| 101 |
doc_ai = max(0, min(100, (avg + 1) * 50))
|
| 102 |
|
|
@@ -110,7 +116,7 @@ def classify_text(text):
|
|
| 110 |
# GRADIO UI
|
| 111 |
# ----------------------------------------------------
|
| 112 |
with gr.Blocks() as demo:
|
| 113 |
-
gr.Markdown("## 🧠 Writenix DetectGPT (
|
| 114 |
|
| 115 |
text_input = gr.Textbox(
|
| 116 |
label="Enter text",
|
|
|
|
| 6 |
import gradio as gr
|
| 7 |
|
| 8 |
# ----------------------------------------------------
|
| 9 |
+
# LOAD CAUSAL LM (distilGPT2 = FAST + LIGHT)
|
| 10 |
# ----------------------------------------------------
|
| 11 |
+
MODEL_NAME = "distilgpt2"
|
| 12 |
|
| 13 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 14 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 15 |
|
| 16 |
model = AutoModelForCausalLM.from_pretrained(
|
| 17 |
+
MODEL_NAME
|
|
|
|
| 18 |
).to(device).eval()
|
| 19 |
|
| 20 |
|
|
|
|
| 28 |
|
| 29 |
|
| 30 |
# ----------------------------------------------------
|
| 31 |
+
# PERPLEXITY FUNCTION (very fast on distilgpt2)
|
| 32 |
# ----------------------------------------------------
|
| 33 |
def perplexity(sentence):
|
| 34 |
inputs = tokenizer(sentence, return_tensors="pt").to(device)
|
| 35 |
with torch.no_grad():
|
| 36 |
out = model(**inputs, labels=inputs["input_ids"])
|
| 37 |
+
return float(torch.exp(out.loss))
|
| 38 |
|
| 39 |
|
| 40 |
# ----------------------------------------------------
|
| 41 |
+
# SIMPLE TEXT PERTURBATION (DetectGPT trick)
|
| 42 |
# ----------------------------------------------------
|
| 43 |
def perturb(text):
|
| 44 |
words = text.split()
|
|
|
|
| 64 |
try:
|
| 65 |
pert_scores.append(perplexity(p))
|
| 66 |
except:
|
| 67 |
+
continue
|
| 68 |
|
| 69 |
if not pert_scores:
|
| 70 |
return 0
|
| 71 |
|
| 72 |
+
# DetectGPT signal
|
| 73 |
+
return float(np.mean(pert_scores) - base)
|
| 74 |
|
| 75 |
|
| 76 |
# ----------------------------------------------------
|
|
|
|
| 90 |
scores.append(score)
|
| 91 |
|
| 92 |
label = "AI" if score > 0 else "Human"
|
| 93 |
+
|
| 94 |
results.append([s, label, f"{abs(score):.4f}"])
|
| 95 |
|
| 96 |
if label == "AI":
|
| 97 |
+
highlighted.append(
|
| 98 |
+
f"<p style='color:red;font-weight:bold'>{s}</p>"
|
| 99 |
+
)
|
| 100 |
else:
|
| 101 |
+
highlighted.append(
|
| 102 |
+
f"<p style='color:green;font-weight:bold'>{s}</p>"
|
| 103 |
+
)
|
| 104 |
|
| 105 |
+
# Document-level score (Turnitin-like scaling)
|
| 106 |
avg = np.mean(scores)
|
| 107 |
doc_ai = max(0, min(100, (avg + 1) * 50))
|
| 108 |
|
|
|
|
| 116 |
# GRADIO UI
|
| 117 |
# ----------------------------------------------------
|
| 118 |
with gr.Blocks() as demo:
|
| 119 |
+
gr.Markdown("## 🧠 Writenix DetectGPT (distilgpt2 — Ultra Fast Version)")
|
| 120 |
|
| 121 |
text_input = gr.Textbox(
|
| 122 |
label="Enter text",
|