File size: 3,989 Bytes
c1f68a7 7fb6f67 032fc90 9525cff 7fb6f67 93b7207 9525cff 032fc90 3968acc 7fb6f67 3968acc 7fb6f67 7c62b75 3968acc 7fb6f67 3968acc 7fb6f67 3968acc dc7ae74 032fc90 7fb6f67 3968acc 7fb6f67 3968acc 7fb6f67 8bcd35e 7fb6f67 92b0b05 7fb6f67 92b0b05 7fb6f67 92b0b05 93b7207 92b0b05 93b7207 7fb6f67 3968acc 9525cff 3968acc 8bcd35e 3968acc 7fb6f67 8bcd35e 7fb6f67 92b0b05 7fb6f67 92b0b05 7fb6f67 92b0b05 3968acc 8bcd35e 3968acc 92b0b05 8bcd35e 3968acc e952a13 3968acc 032fc90 7fb6f67 8bcd35e 032fc90 dc7ae74 032fc90 b91f5f8 dc7ae74 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import math
import re
import numpy as np
# -------------------------------
# Sentence splitter (no nltk)
# -------------------------------
def simple_sent_tokenize(text):
sentences = re.split(r'(?<=[.!?]) +', text)
return [s for s in sentences if s.strip()]
# -------------------------------
# Load Pre-trained Model (Template for Fine-tuning)
# -------------------------------
model_name = "roberta-large" # Replace with your fine-tuned AI detector
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
# -------------------------------
# GPT-2 Perplexity Helper (Optional)
# -------------------------------
from transformers import GPT2LMHeadModel, GPT2Tokenizer
gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2")
gpt2_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
def compute_perplexity(text):
enc = gpt2_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
input_ids = enc.input_ids
with torch.no_grad():
loss = gpt2_model(input_ids, labels=input_ids).loss
return math.exp(loss.item())
# -------------------------------
# Sentence-level scoring (Stacked Classifier Template)
# -------------------------------
def sentence_score(sentence):
# 1️⃣ Transformer probability
inputs = tokenizer(sentence, return_tensors="pt", truncation=True, max_length=512)
with torch.no_grad():
logits = model(**inputs).logits
transformer_prob = torch.softmax(logits, dim=1).tolist()[0][1] # AI probability
# 2️⃣ GPT-2 perplexity feature
ppl = compute_perplexity(sentence)
ppl_score = max(0, min(1, 100/ppl)) # normalize
# 3️⃣ Meta-score (weighted stacking)
final_score = transformer_prob*0.8 + ppl_score*0.2 # Adjust weights after fine-tuning
return final_score
# -------------------------------
# Tuned Verdicts
# -------------------------------
def verdict_95(ai_prob):
if ai_prob < 0.3:
return "Most likely human-written.", "green"
elif ai_prob < 0.5:
return "Possibly human-written with minimal AI assistance.", "yellowgreen"
elif ai_prob < 0.7:
return "Possibly AI-generated or human using AI assistance.", "orange"
else:
return "Likely AI-generated or heavily AI-assisted.", "red"
# -------------------------------
# Main Analysis Function
# -------------------------------
def analyze_text(user_text):
sentences = simple_sent_tokenize(user_text)
if not sentences:
return {"error": "Please enter some text."}
sentence_probs = [sentence_score(s) for s in sentences]
final_ai = np.mean(sentence_probs)
final_human = 1 - final_ai
verdict_text, verdict_color = verdict_95(final_ai)
# Sentence-level colored verdicts
sentence_details = []
for s, p in zip(sentences, sentence_probs):
s_verdict, s_color = verdict_95(p)
sentence_details.append({
"sentence": s,
"AI Probability": round(p*100,2),
"Verdict": s_verdict,
"Color": s_color
})
return {
"Final AI Probability": round(final_ai*100,2),
"Final Human Probability": round(final_human*100,2),
"Verdict": verdict_text,
"Verdict Color": verdict_color,
"Sentence-level Details": sentence_details
}
# -------------------------------
# Gradio UI
# -------------------------------
with gr.Blocks() as demo:
gr.Markdown("# 🌐 High-Accuracy Universal AI vs Human Text Detector")
user_input = gr.Textbox(label="Enter Text", placeholder="Paste text here...", lines=12, type="text")
run_btn = gr.Button("Run Detection")
output = gr.JSON(label="Results")
run_btn.click(analyze_text, inputs=user_input, outputs=output)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860) |