Spaces:
Running
Running
File size: 3,579 Bytes
958e345 ceeca7d 57bb1ed 6c56aca ceeca7d 49d2f3f ceeca7d 9267b26 49d2f3f ceeca7d 9267b26 49d2f3f ceeca7d 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import re
import pandas as pd
import gradio as gr
# -----------------------------
# STRONGEST MODEL
# -----------------------------
MODEL_NAME = "Hello-SimpleAI/HC3-Plus-OpenAI-Detector"
# -----------------------------
# LOAD MODEL
# -----------------------------
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported()) else torch.float32
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, torch_dtype=dtype)
model.to(device).eval()
# -----------------------------
# PARAGRAPH TOKENIZER
# -----------------------------
def paragraph_split(text):
paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
return paragraphs
# -----------------------------
# INFERENCE FUNCTION
# -----------------------------
def classify_text(text):
if not text.strip():
return "⚠️ Please enter some text.", None, None
paragraphs = paragraph_split(text)
if not paragraphs:
return "⚠️ No paragraphs detected.", None, None
# Tokenize paragraphs
inputs = tokenizer(
paragraphs,
return_tensors="pt",
padding=True,
truncation=True,
max_length=model.config.max_position_embeddings
).to(device)
# Predict
with torch.no_grad():
logits = model(**inputs).logits
probs = F.softmax(logits, dim=-1).cpu()
preds = torch.argmax(probs, dim=-1).cpu()
# -----------------------------
# BUILD RESULTS
# -----------------------------
results = []
highlighted_paragraphs = []
for i, p in enumerate(paragraphs):
pred_label = preds[i].item()
confidence = probs[i, pred_label].item()
label = "AI" if pred_label == 0 else "Human"
conf_text = f"{confidence:.2f}"
results.append([p, label, conf_text])
# Highlighting
if label == "AI":
highlighted_paragraphs.append(
f"<p style='color:red; font-weight:bold; margin-bottom:10px'>{p}</p>"
)
else:
highlighted_paragraphs.append(
f"<p style='color:green; font-weight:bold; margin-bottom:10px'>{p}</p>"
)
# -----------------------------
# DOCUMENT LEVEL SCORE
# -----------------------------
avg = torch.mean(probs, dim=0)
ai_likelihood = avg[0].item() * 100 # class 0 = AI
highlighted_html = "\n".join(highlighted_paragraphs)
df = pd.DataFrame(results, columns=["Paragraph", "Classification", "Confidence"])
return f"⚖️ Document AI Likelihood: {ai_likelihood:.1f}%", highlighted_html, df
# -----------------------------
# GRADIO INTERFACE
# -----------------------------
with gr.Blocks() as demo:
gr.Markdown("## 🧠 Writenix Advanced AI Detection (Paragraph-Level)")
with gr.Row():
text_input = gr.Textbox(
label="Enter text",
lines=14,
placeholder="Paste your essay, article, or content here…"
)
classify_btn = gr.Button("🚀 Detect AI")
ai_score = gr.Label(label="Overall AI Likelihood")
highlighted = gr.HTML()
table = gr.Dataframe(headers=["Paragraph", "Classification", "Confidence"], wrap=True)
classify_btn.click(classify_text, inputs=text_input, outputs=[ai_score, highlighted, table])
if __name__ == "__main__":
demo.launch()
|