yagnik12 commited on
Commit
3968acc
·
verified ·
1 Parent(s): 8bcd35e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -31
app.py CHANGED
@@ -2,67 +2,79 @@ import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, GPT2LMHeadModel
3
  import torch
4
  import math
 
 
 
5
 
6
- # Load models
 
 
7
  detector_names = [
8
  "Hello-SimpleAI/chatgpt-detector-roberta",
9
  "roberta-large-openai-detector"
10
  ]
 
11
  detector_tokenizers = [AutoTokenizer.from_pretrained(name) for name in detector_names]
12
  detector_models = [AutoModelForSequenceClassification.from_pretrained(name) for name in detector_names]
13
 
14
  gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
15
  gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2")
16
 
17
- # Helper functions
18
- def compute_perplexity(text: str) -> float:
 
 
19
  enc = gpt2_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
20
  input_ids = enc.input_ids
21
  with torch.no_grad():
22
  loss = gpt2_model(input_ids, labels=input_ids).loss
23
  return math.exp(loss.item())
24
 
25
- def analyze_text(user_text: str):
26
- if not user_text.strip():
27
- return {"error": "Please enter some text to analyze."}
28
-
29
- # Model 1: ChatGPT detector
30
  probs = []
31
  for tokenizer, model in zip(detector_tokenizers, detector_models):
32
- inputs = tokenizer(user_text, return_tensors="pt", truncation=True, max_length=512)
33
  with torch.no_grad():
34
  logits = model(**inputs).logits
35
- probs.append(torch.softmax(logits, dim=1).tolist()[0][1]) # AI probability
 
 
 
 
36
 
37
- # Model 2: GPT-2 Perplexity
38
- ppl = compute_perplexity(user_text)
39
- ppl_score = max(0, min(1, 100 / ppl)) # normalized to [0,1]
 
40
 
41
- # Aggregate result
42
- final_ai = sum(probs) / len(probs) * 0.7 + ppl_score * 0.3
43
  final_human = 1 - final_ai
44
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  return {
46
- "Final AI Probability": round(final_ai * 100, 2),
47
- "Final Human Probability": round(final_human * 100, 2),
48
- "Verdict": verdict(final_ai * 100)
 
49
  }
50
 
51
- def verdict(ai_prob):
52
- if ai_prob < 20:
53
- return "Most likely human-written."
54
- elif 20 <= ai_prob < 40:
55
- return "Possibly human-written with minimal AI assistance."
56
- elif 40 <= ai_prob < 60:
57
- return "Unclear – could be either human or AI-assisted."
58
- elif 60 <= ai_prob < 80:
59
- return "Possibly AI-generated, or a human using AI assistance."
60
- else:
61
- return "Likely AI-generated or heavily AI-assisted."
62
-
63
  # Gradio UI
 
64
  with gr.Blocks() as demo:
65
- gr.Markdown("# 🔍 Enhanced AI vs Human Text Detector")
66
  user_input = gr.Textbox(label="Enter Text", placeholder="Paste text here...", lines=12, type="text")
67
  run_btn = gr.Button("Run Detection")
68
  output = gr.JSON(label="Results")
 
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, GPT2LMHeadModel
3
  import torch
4
  import math
5
+ import nltk
6
+ nltk.download('punkt')
7
+ from nltk.tokenize import sent_tokenize
8
 
9
+ # -------------------------------
10
+ # Load Models
11
+ # -------------------------------
12
  detector_names = [
13
  "Hello-SimpleAI/chatgpt-detector-roberta",
14
  "roberta-large-openai-detector"
15
  ]
16
+
17
  detector_tokenizers = [AutoTokenizer.from_pretrained(name) for name in detector_names]
18
  detector_models = [AutoModelForSequenceClassification.from_pretrained(name) for name in detector_names]
19
 
20
  gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
21
  gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2")
22
 
23
+ # -------------------------------
24
+ # Helper Functions
25
+ # -------------------------------
26
+ def compute_perplexity(text):
27
  enc = gpt2_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
28
  input_ids = enc.input_ids
29
  with torch.no_grad():
30
  loss = gpt2_model(input_ids, labels=input_ids).loss
31
  return math.exp(loss.item())
32
 
33
+ def sentence_score(sentence):
 
 
 
 
34
  probs = []
35
  for tokenizer, model in zip(detector_tokenizers, detector_models):
36
+ inputs = tokenizer(sentence, return_tensors="pt", truncation=True, max_length=512)
37
  with torch.no_grad():
38
  logits = model(**inputs).logits
39
+ probs.append(torch.softmax(logits, dim=1).tolist()[0][1])
40
+ ppl = compute_perplexity(sentence)
41
+ ppl_score = max(0, min(1, 100/ppl))
42
+ # Weighted average: 70% model ensemble, 30% perplexity
43
+ return sum(probs)/len(probs)*0.7 + ppl_score*0.3
44
 
45
+ def analyze_text(user_text):
46
+ sentences = sent_tokenize(user_text)
47
+ if not sentences:
48
+ return {"error": "Please enter some text."}
49
 
50
+ sentence_probs = [sentence_score(s) for s in sentences]
51
+ final_ai = sum(sentence_probs)/len(sentence_probs)
52
  final_human = 1 - final_ai
53
 
54
+ # Verdict
55
+ if final_ai < 0.2:
56
+ verdict_text = "Most likely human-written."
57
+ elif final_ai < 0.4:
58
+ verdict_text = "Possibly human-written with minimal AI assistance."
59
+ elif final_ai < 0.6:
60
+ verdict_text = "Unclear – could be human or AI-assisted."
61
+ elif final_ai < 0.8:
62
+ verdict_text = "Possibly AI-generated or human using AI assistance."
63
+ else:
64
+ verdict_text = "Likely AI-generated or heavily AI-assisted."
65
+
66
  return {
67
+ "Final AI Probability": round(final_ai*100,2),
68
+ "Final Human Probability": round(final_human*100,2),
69
+ "Verdict": verdict_text,
70
+ "Sentence-level AI probabilities": [round(p*100,2) for p in sentence_probs]
71
  }
72
 
73
+ # -------------------------------
 
 
 
 
 
 
 
 
 
 
 
74
  # Gradio UI
75
+ # -------------------------------
76
  with gr.Blocks() as demo:
77
+ gr.Markdown("# 🌐 Universal AI vs Human Text Detector")
78
  user_input = gr.Textbox(label="Enter Text", placeholder="Paste text here...", lines=12, type="text")
79
  run_btn = gr.Button("Run Detection")
80
  output = gr.JSON(label="Results")