|
|
import gradio as gr |
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
import torch |
|
|
import math |
|
|
import re |
|
|
import numpy as np |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def simple_sent_tokenize(text): |
|
|
sentences = re.split(r'(?<=[.!?]) +', text) |
|
|
return [s for s in sentences if s.strip()] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_name = "roberta-large" |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from transformers import GPT2LMHeadModel, GPT2Tokenizer |
|
|
gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2") |
|
|
gpt2_tokenizer = GPT2Tokenizer.from_pretrained("gpt2") |
|
|
|
|
|
def compute_perplexity(text): |
|
|
enc = gpt2_tokenizer(text, return_tensors="pt", truncation=True, max_length=512) |
|
|
input_ids = enc.input_ids |
|
|
with torch.no_grad(): |
|
|
loss = gpt2_model(input_ids, labels=input_ids).loss |
|
|
return math.exp(loss.item()) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def sentence_score(sentence): |
|
|
|
|
|
inputs = tokenizer(sentence, return_tensors="pt", truncation=True, max_length=512) |
|
|
with torch.no_grad(): |
|
|
logits = model(**inputs).logits |
|
|
transformer_prob = torch.softmax(logits, dim=1).tolist()[0][1] |
|
|
|
|
|
|
|
|
ppl = compute_perplexity(sentence) |
|
|
ppl_score = max(0, min(1, 100/ppl)) |
|
|
|
|
|
|
|
|
final_score = transformer_prob*0.8 + ppl_score*0.2 |
|
|
return final_score |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def verdict_95(ai_prob): |
|
|
if ai_prob < 0.3: |
|
|
return "Most likely human-written.", "green" |
|
|
elif ai_prob < 0.5: |
|
|
return "Possibly human-written with minimal AI assistance.", "yellowgreen" |
|
|
elif ai_prob < 0.7: |
|
|
return "Possibly AI-generated or human using AI assistance.", "orange" |
|
|
else: |
|
|
return "Likely AI-generated or heavily AI-assisted.", "red" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def analyze_text(user_text): |
|
|
sentences = simple_sent_tokenize(user_text) |
|
|
if not sentences: |
|
|
return {"error": "Please enter some text."} |
|
|
|
|
|
sentence_probs = [sentence_score(s) for s in sentences] |
|
|
final_ai = np.mean(sentence_probs) |
|
|
final_human = 1 - final_ai |
|
|
verdict_text, verdict_color = verdict_95(final_ai) |
|
|
|
|
|
|
|
|
sentence_details = [] |
|
|
for s, p in zip(sentences, sentence_probs): |
|
|
s_verdict, s_color = verdict_95(p) |
|
|
sentence_details.append({ |
|
|
"sentence": s, |
|
|
"AI Probability": round(p*100,2), |
|
|
"Verdict": s_verdict, |
|
|
"Color": s_color |
|
|
}) |
|
|
|
|
|
return { |
|
|
"Final AI Probability": round(final_ai*100,2), |
|
|
"Final Human Probability": round(final_human*100,2), |
|
|
"Verdict": verdict_text, |
|
|
"Verdict Color": verdict_color, |
|
|
"Sentence-level Details": sentence_details |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# 🌐 High-Accuracy Universal AI vs Human Text Detector") |
|
|
user_input = gr.Textbox(label="Enter Text", placeholder="Paste text here...", lines=12, type="text") |
|
|
run_btn = gr.Button("Run Detection") |
|
|
output = gr.JSON(label="Results") |
|
|
run_btn.click(analyze_text, inputs=user_input, outputs=output) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(server_name="0.0.0.0", server_port=7860) |