yagnik12 commited on
Commit
8bcd35e
·
verified ·
1 Parent(s): a43517c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -87
app.py CHANGED
@@ -2,41 +2,19 @@ import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, GPT2LMHeadModel
3
  import torch
4
  import math
5
- import nltk
6
 
7
- # Download NLTK punkt tokenizer if not present
8
- nltk.download('punkt')
9
- from nltk.tokenize import sent_tokenize
10
-
11
- # -------------------------------
12
- # Load Models
13
- # -------------------------------
14
-
15
- # Example models: use open-source detectors available on Hugging Face
16
  detector_names = [
17
- "Hello-SimpleAI/chatgpt-detector-roberta", # public model
18
- "roberta-large-openai-detector" # public model
19
  ]
 
 
20
 
21
- detector_tokenizers = []
22
- detector_models = []
23
-
24
- for name in detector_names:
25
- try:
26
- detector_tokenizers.append(AutoTokenizer.from_pretrained(name))
27
- detector_models.append(AutoModelForSequenceClassification.from_pretrained(name))
28
- except Exception as e:
29
- print(f"Error loading model {name}: {e}")
30
-
31
- # GPT-2 for perplexity scoring
32
  gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
33
  gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2")
34
 
35
-
36
- # -------------------------------
37
  # Helper functions
38
- # -------------------------------
39
-
40
  def compute_perplexity(text: str) -> float:
41
  enc = gpt2_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
42
  input_ids = enc.input_ids
@@ -44,6 +22,32 @@ def compute_perplexity(text: str) -> float:
44
  loss = gpt2_model(input_ids, labels=input_ids).loss
45
  return math.exp(loss.item())
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  def verdict(ai_prob):
48
  if ai_prob < 20:
49
  return "Most likely human-written."
@@ -56,70 +60,12 @@ def verdict(ai_prob):
56
  else:
57
  return "Likely AI-generated or heavily AI-assisted."
58
 
59
- def analyze_sentence(sentence):
60
- # Detector probabilities
61
- probs = []
62
- for tokenizer, model in zip(detector_tokenizers, detector_models):
63
- try:
64
- inputs = tokenizer(sentence, return_tensors="pt", truncation=True, max_length=512)
65
- with torch.no_grad():
66
- logits = model(**inputs).logits
67
- probs.append(torch.softmax(logits, dim=1).tolist()[0][1]) # AI probability
68
- except Exception as e:
69
- print(f"Error analyzing sentence with model: {e}")
70
-
71
- # GPT-2 perplexity
72
- ppl = compute_perplexity(sentence)
73
- ppl_score = max(0, min(1, 100 / ppl))
74
-
75
- # Aggregate
76
- if probs:
77
- final_ai = sum(probs) / len(probs) * 0.7 + ppl_score * 0.3
78
- else:
79
- final_ai = ppl_score # fallback if detectors fail
80
- return round(final_ai * 100, 2)
81
-
82
- def analyze_text(text):
83
- if not text.strip():
84
- return {"error": "Please enter some text to analyze."}
85
-
86
- sentences = sent_tokenize(text)
87
- sentence_results = []
88
- total_ai = 0
89
-
90
- for sent in sentences:
91
- ai_prob = analyze_sentence(sent)
92
- total_ai += ai_prob
93
- sentence_results.append({"sentence": sent, "AI Probability (%)": ai_prob})
94
-
95
- final_ai_prob = total_ai / len(sentences)
96
- final_human_prob = 100 - final_ai_prob
97
- final_verdict = verdict(final_ai_prob)
98
-
99
- return {
100
- "Sentence-level Analysis": sentence_results,
101
- "Final AI Probability (%)": round(final_ai_prob, 2),
102
- "Final Human Probability (%)": round(final_human_prob, 2),
103
- "Verdict": final_verdict
104
- }
105
-
106
- # -------------------------------
107
  # Gradio UI
108
- # -------------------------------
109
-
110
  with gr.Blocks() as demo:
111
- gr.Markdown("# 🔍 Enhanced AI vs Human Text Detector (Sentence-Level)")
112
-
113
- user_input = gr.Textbox(
114
- label="✍️ Enter Text",
115
- placeholder="Paste text here...",
116
- lines=12,
117
- type="text"
118
- )
119
-
120
  run_btn = gr.Button("Run Detection")
121
  output = gr.JSON(label="Results")
122
-
123
  run_btn.click(analyze_text, inputs=user_input, outputs=output)
124
 
125
  if __name__ == "__main__":
 
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, GPT2LMHeadModel
3
  import torch
4
  import math
 
5
 
6
+ # Load models
 
 
 
 
 
 
 
 
7
  detector_names = [
8
+ "Hello-SimpleAI/chatgpt-detector-roberta",
9
+ "roberta-large-openai-detector"
10
  ]
11
+ detector_tokenizers = [AutoTokenizer.from_pretrained(name) for name in detector_names]
12
+ detector_models = [AutoModelForSequenceClassification.from_pretrained(name) for name in detector_names]
13
 
 
 
 
 
 
 
 
 
 
 
 
14
  gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
15
  gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2")
16
 
 
 
17
  # Helper functions
 
 
18
  def compute_perplexity(text: str) -> float:
19
  enc = gpt2_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
20
  input_ids = enc.input_ids
 
22
  loss = gpt2_model(input_ids, labels=input_ids).loss
23
  return math.exp(loss.item())
24
 
25
+ def analyze_text(user_text: str):
26
+ if not user_text.strip():
27
+ return {"error": "Please enter some text to analyze."}
28
+
29
+ # Model 1: ChatGPT detector
30
+ probs = []
31
+ for tokenizer, model in zip(detector_tokenizers, detector_models):
32
+ inputs = tokenizer(user_text, return_tensors="pt", truncation=True, max_length=512)
33
+ with torch.no_grad():
34
+ logits = model(**inputs).logits
35
+ probs.append(torch.softmax(logits, dim=1).tolist()[0][1]) # AI probability
36
+
37
+ # Model 2: GPT-2 Perplexity
38
+ ppl = compute_perplexity(user_text)
39
+ ppl_score = max(0, min(1, 100 / ppl)) # normalized to [0,1]
40
+
41
+ # Aggregate result
42
+ final_ai = sum(probs) / len(probs) * 0.7 + ppl_score * 0.3
43
+ final_human = 1 - final_ai
44
+
45
+ return {
46
+ "Final AI Probability": round(final_ai * 100, 2),
47
+ "Final Human Probability": round(final_human * 100, 2),
48
+ "Verdict": verdict(final_ai * 100)
49
+ }
50
+
51
  def verdict(ai_prob):
52
  if ai_prob < 20:
53
  return "Most likely human-written."
 
60
  else:
61
  return "Likely AI-generated or heavily AI-assisted."
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  # Gradio UI
 
 
64
  with gr.Blocks() as demo:
65
+ gr.Markdown("# 🔍 Enhanced AI vs Human Text Detector")
66
+ user_input = gr.Textbox(label="Enter Text", placeholder="Paste text here...", lines=12, type="text")
 
 
 
 
 
 
 
67
  run_btn = gr.Button("Run Detection")
68
  output = gr.JSON(label="Results")
 
69
  run_btn.click(analyze_text, inputs=user_input, outputs=output)
70
 
71
  if __name__ == "__main__":