Spaces:
Running
Running
File size: 3,674 Bytes
958e345 ceeca7d 57bb1ed 6c56aca ceeca7d 49d2f3f ceeca7d 9267b26 1feb8eb 9267b26 1feb8eb 9267b26 49d2f3f ceeca7d 9267b26 1feb8eb 9267b26 1feb8eb 9267b26 49d2f3f ceeca7d 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 1feb8eb 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 1feb8eb 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 1feb8eb 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f 9267b26 49d2f3f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import re
import pandas as pd
import gradio as gr
# -----------------------------
# STRONGEST AVAILABLE PUBLIC MODEL (WORKING)
# -----------------------------
MODEL_NAME = "openai-community/roberta-base-openai-detector"
# -----------------------------
# LOAD MODEL
# -----------------------------
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported()) else torch.float32
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, torch_dtype=dtype)
model.to(device).eval()
# -----------------------------
# PARAGRAPH SPLITTER
# -----------------------------
def paragraph_split(text):
paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
return paragraphs
# -----------------------------
# AI DETECTION FUNCTION
# -----------------------------
def classify_text(text):
if not text.strip():
return "⚠️ Please enter some text.", None, None
paragraphs = paragraph_split(text)
if not paragraphs:
return "⚠️ No paragraphs detected.", None, None
# Tokenize paragraphs
inputs = tokenizer(
paragraphs,
return_tensors="pt",
padding=True,
truncation=True,
max_length=model.config.max_position_embeddings
).to(device)
# Predict
with torch.no_grad():
logits = model(**inputs).logits
probs = F.softmax(logits, dim=-1).cpu()
preds = torch.argmax(probs, dim=-1).cpu()
# -----------------------------
# BUILD RESULTS
# -----------------------------
results = []
highlighted_paragraphs = []
for i, p in enumerate(paragraphs):
pred_label = preds[i].item()
confidence = probs[i, pred_label].item()
label = "AI" if pred_label == 1 else "Human"
# NOTE: roberta-base-openai-detector → label 1 = Fake (AI), 0 = Real (Human)
conf_text = f"{confidence:.2f}"
results.append([p, label, conf_text])
if label == "AI":
highlighted_paragraphs.append(
f"<p style='color:red; font-weight:bold; margin-bottom:10px'>{p}</p>"
)
else:
highlighted_paragraphs.append(
f"<p style='color:green; font-weight:bold; margin-bottom:10px'>{p}</p>"
)
# -----------------------------
# DOCUMENT LEVEL SCORE
# -----------------------------
avg = torch.mean(probs, dim=0)
ai_likelihood = avg[1].item() * 100 # class 1 = AI
highlighted_html = "\n".join(highlighted_paragraphs)
df = pd.DataFrame(results, columns=["Paragraph", "Classification", "Confidence"])
return f"⚖️ Document AI Likelihood: {ai_likelihood:.1f}%", highlighted_html, df
# -----------------------------
# GRADIO UI
# -----------------------------
with gr.Blocks() as demo:
gr.Markdown("## 🧠 Writenix Advanced AI Detection (Paragraph-Level)")
with gr.Row():
text_input = gr.Textbox(
label="Enter text",
lines=14,
placeholder="Paste your essay, article, or content here…"
)
classify_btn = gr.Button("🚀 Detect AI")
ai_score = gr.Label(label="Overall AI Likelihood")
highlighted = gr.HTML()
table = gr.Dataframe(headers=["Paragraph", "Classification", "Confidence"], wrap=True)
classify_btn.click(classify_text, inputs=text_input, outputs=[ai_score, highlighted, table])
if __name__ == "__main__":
demo.launch()
|