yagnik12 commited on
Commit
1c31b93
·
verified ·
1 Parent(s): 7c62b75

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -27
app.py CHANGED
@@ -2,19 +2,32 @@ import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, GPT2LMHeadModel
3
  import torch
4
  import math
 
5
 
 
 
 
 
6
  # Load models
 
 
 
7
  detector_names = [
8
  "Hello-SimpleAI/chatgpt-detector-roberta",
9
  "roberta-large-openai-detector"
10
  ]
 
11
  detector_tokenizers = [AutoTokenizer.from_pretrained(name) for name in detector_names]
12
  detector_models = [AutoModelForSequenceClassification.from_pretrained(name) for name in detector_names]
13
 
 
14
  gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
15
  gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2")
16
 
 
17
  # Helper functions
 
 
18
  def compute_perplexity(text: str) -> float:
19
  enc = gpt2_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
20
  input_ids = enc.input_ids
@@ -22,50 +35,78 @@ def compute_perplexity(text: str) -> float:
22
  loss = gpt2_model(input_ids, labels=input_ids).loss
23
  return math.exp(loss.item())
24
 
25
- def analyze_text(user_text: str):
26
- if not user_text.strip():
27
- return {"error": "Please enter some text to analyze."}
 
 
 
 
 
 
 
 
 
28
 
29
- # Model 1: ChatGPT detector
 
30
  probs = []
31
  for tokenizer, model in zip(detector_tokenizers, detector_models):
32
- inputs = tokenizer(user_text, return_tensors="pt", truncation=True, max_length=512)
33
  with torch.no_grad():
34
  logits = model(**inputs).logits
35
  probs.append(torch.softmax(logits, dim=1).tolist()[0][1]) # AI probability
36
 
37
- # Model 2: GPT-2 Perplexity
38
- ppl = compute_perplexity(user_text)
39
- ppl_score = max(0, min(1, 100 / ppl)) # normalized to [0,1]
40
 
41
- # Aggregate result
42
  final_ai = sum(probs) / len(probs) * 0.7 + ppl_score * 0.3
43
- final_human = 1 - final_ai
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  return {
46
- "Final AI Probability": round(final_ai * 100, 2),
47
- "Final Human Probability": round(final_human * 100, 2),
48
- "Verdict": verdict(final_ai * 100)
 
49
  }
50
 
51
- def verdict(ai_prob):
52
- if ai_prob < 20:
53
- return "Most likely human-written."
54
- elif 20 <= ai_prob < 40:
55
- return "Possibly human-written with minimal AI assistance."
56
- elif 40 <= ai_prob < 60:
57
- return "Unclear – could be either human or AI-assisted."
58
- elif 60 <= ai_prob < 80:
59
- return "Possibly AI-generated, or a human using AI assistance."
60
- else:
61
- return "Likely AI-generated or heavily AI-assisted."
62
-
63
  # Gradio UI
 
 
64
  with gr.Blocks() as demo:
65
- gr.Markdown("# 🔍 Enhanced AI vs Human Text Detector")
66
- user_input = gr.Textbox(label="Enter Text", placeholder="Paste text here...", lines=12, type="text")
 
 
 
 
 
 
 
67
  run_btn = gr.Button("Run Detection")
68
  output = gr.JSON(label="Results")
 
69
  run_btn.click(analyze_text, inputs=user_input, outputs=output)
70
 
71
  if __name__ == "__main__":
 
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, GPT2LMHeadModel
3
  import torch
4
  import math
5
+ import nltk
6
 
7
+ nltk.download('punkt')
8
+ from nltk.tokenize import sent_tokenize
9
+
10
+ # -------------------------------
11
  # Load models
12
+ # -------------------------------
13
+
14
+ # Detector models (placeholders for fine-tuned models)
15
  detector_names = [
16
  "Hello-SimpleAI/chatgpt-detector-roberta",
17
  "roberta-large-openai-detector"
18
  ]
19
+
20
  detector_tokenizers = [AutoTokenizer.from_pretrained(name) for name in detector_names]
21
  detector_models = [AutoModelForSequenceClassification.from_pretrained(name) for name in detector_names]
22
 
23
+ # GPT-2 for perplexity scoring
24
  gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
25
  gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2")
26
 
27
+ # -------------------------------
28
  # Helper functions
29
+ # -------------------------------
30
+
31
  def compute_perplexity(text: str) -> float:
32
  enc = gpt2_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
33
  input_ids = enc.input_ids
 
35
  loss = gpt2_model(input_ids, labels=input_ids).loss
36
  return math.exp(loss.item())
37
 
38
+ def verdict(ai_prob):
39
+ """Return human-readable verdict based on AI probability (0-100)."""
40
+ if ai_prob < 20:
41
+ return "Most likely human-written."
42
+ elif 20 <= ai_prob < 40:
43
+ return "Possibly human-written with minimal AI assistance."
44
+ elif 40 <= ai_prob < 60:
45
+ return "Unclear – could be either human or AI-assisted."
46
+ elif 60 <= ai_prob < 80:
47
+ return "Possibly AI-generated, or a human using AI assistance."
48
+ else:
49
+ return "Likely AI-generated or heavily AI-assisted."
50
 
51
+ def analyze_sentence(sentence):
52
+ # Detector probabilities
53
  probs = []
54
  for tokenizer, model in zip(detector_tokenizers, detector_models):
55
+ inputs = tokenizer(sentence, return_tensors="pt", truncation=True, max_length=512)
56
  with torch.no_grad():
57
  logits = model(**inputs).logits
58
  probs.append(torch.softmax(logits, dim=1).tolist()[0][1]) # AI probability
59
 
60
+ # GPT-2 perplexity
61
+ ppl = compute_perplexity(sentence)
62
+ ppl_score = max(0, min(1, 100 / ppl))
63
 
64
+ # Aggregate
65
  final_ai = sum(probs) / len(probs) * 0.7 + ppl_score * 0.3
66
+ return round(final_ai * 100, 2) # return in percentage
67
+
68
+ def analyze_text(text):
69
+ if not text.strip():
70
+ return {"error": "Please enter some text to analyze."}
71
+
72
+ sentences = sent_tokenize(text)
73
+ sentence_results = []
74
+ total_ai = 0
75
+
76
+ for sent in sentences:
77
+ ai_prob = analyze_sentence(sent)
78
+ total_ai += ai_prob
79
+ sentence_results.append({"sentence": sent, "AI Probability (%)": ai_prob})
80
+
81
+ # Final aggregated AI probability
82
+ final_ai_prob = total_ai / len(sentences)
83
+ final_human_prob = 100 - final_ai_prob
84
+ final_verdict = verdict(final_ai_prob)
85
 
86
  return {
87
+ "Sentence-level Analysis": sentence_results,
88
+ "Final AI Probability (%)": round(final_ai_prob, 2),
89
+ "Final Human Probability (%)": round(final_human_prob, 2),
90
+ "Verdict": final_verdict
91
  }
92
 
93
+ # -------------------------------
 
 
 
 
 
 
 
 
 
 
 
94
  # Gradio UI
95
+ # -------------------------------
96
+
97
  with gr.Blocks() as demo:
98
+ gr.Markdown("# 🔍 Enhanced AI vs Human Text Detector (Sentence-Level)")
99
+
100
+ user_input = gr.Textbox(
101
+ label="✍️ Enter Text",
102
+ placeholder="Paste text here...",
103
+ lines=12,
104
+ type="text"
105
+ )
106
+
107
  run_btn = gr.Button("Run Detection")
108
  output = gr.JSON(label="Results")
109
+
110
  run_btn.click(analyze_text, inputs=user_input, outputs=output)
111
 
112
  if __name__ == "__main__":