Spaces:
Running
Running
| import torch | |
| import torch.nn.functional as F | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import re | |
| import pandas as pd | |
| import gradio as gr | |
| # ----------------------------- | |
| # STRONGEST AVAILABLE PUBLIC MODEL (WORKING) | |
| # ----------------------------- | |
| MODEL_NAME = "openai-community/roberta-base-openai-detector" | |
| # ----------------------------- | |
| # LOAD MODEL | |
| # ----------------------------- | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported()) else torch.float32 | |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, torch_dtype=dtype) | |
| model.to(device).eval() | |
| # ----------------------------- | |
| # PARAGRAPH SPLITTER | |
| # ----------------------------- | |
| def paragraph_split(text): | |
| paragraphs = [p.strip() for p in text.split("\n") if p.strip()] | |
| return paragraphs | |
| # ----------------------------- | |
| # AI DETECTION FUNCTION | |
| # ----------------------------- | |
| def classify_text(text): | |
| if not text.strip(): | |
| return "⚠️ Please enter some text.", None, None | |
| paragraphs = paragraph_split(text) | |
| if not paragraphs: | |
| return "⚠️ No paragraphs detected.", None, None | |
| # Tokenize paragraphs | |
| inputs = tokenizer( | |
| paragraphs, | |
| return_tensors="pt", | |
| padding=True, | |
| truncation=True, | |
| max_length=model.config.max_position_embeddings | |
| ).to(device) | |
| # Predict | |
| with torch.no_grad(): | |
| logits = model(**inputs).logits | |
| probs = F.softmax(logits, dim=-1).cpu() | |
| preds = torch.argmax(probs, dim=-1).cpu() | |
| # ----------------------------- | |
| # BUILD RESULTS | |
| # ----------------------------- | |
| results = [] | |
| highlighted_paragraphs = [] | |
| for i, p in enumerate(paragraphs): | |
| pred_label = preds[i].item() | |
| confidence = probs[i, pred_label].item() | |
| label = "AI" if pred_label == 1 else "Human" | |
| # NOTE: roberta-base-openai-detector → label 1 = Fake (AI), 0 = Real (Human) | |
| conf_text = f"{confidence:.2f}" | |
| results.append([p, label, conf_text]) | |
| if label == "AI": | |
| highlighted_paragraphs.append( | |
| f"<p style='color:red; font-weight:bold; margin-bottom:10px'>{p}</p>" | |
| ) | |
| else: | |
| highlighted_paragraphs.append( | |
| f"<p style='color:green; font-weight:bold; margin-bottom:10px'>{p}</p>" | |
| ) | |
| # ----------------------------- | |
| # DOCUMENT LEVEL SCORE | |
| # ----------------------------- | |
| avg = torch.mean(probs, dim=0) | |
| ai_likelihood = avg[1].item() * 100 # class 1 = AI | |
| highlighted_html = "\n".join(highlighted_paragraphs) | |
| df = pd.DataFrame(results, columns=["Paragraph", "Classification", "Confidence"]) | |
| return f"⚖️ Document AI Likelihood: {ai_likelihood:.1f}%", highlighted_html, df | |
| # ----------------------------- | |
| # GRADIO UI | |
| # ----------------------------- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## 🧠 Writenix Advanced AI Detection (Paragraph-Level)") | |
| with gr.Row(): | |
| text_input = gr.Textbox( | |
| label="Enter text", | |
| lines=14, | |
| placeholder="Paste your essay, article, or content here…" | |
| ) | |
| classify_btn = gr.Button("🚀 Detect AI") | |
| ai_score = gr.Label(label="Overall AI Likelihood") | |
| highlighted = gr.HTML() | |
| table = gr.Dataframe(headers=["Paragraph", "Classification", "Confidence"], wrap=True) | |
| classify_btn.click(classify_text, inputs=text_input, outputs=[ai_score, highlighted, table]) | |
| if __name__ == "__main__": | |
| demo.launch() | |