ShivamVN commited on
Commit
cf78d1f
·
verified ·
1 Parent(s): 2b3a3a1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -32
app.py CHANGED
@@ -3,32 +3,29 @@ import torch
3
  import torch.nn.functional as F
4
  from transformers import AutoModelForSequenceClassification, AutoTokenizer, GPT2LMHeadModel, GPT2TokenizerFast
5
  from nltk.tokenize import sent_tokenize
6
- # --- SETUP ---
7
  import nltk
8
- nltk.download('punkt')
9
- nltk.download('punkt_tab') # <--- ADD THIS LINE
10
- print("Initializing App...")
11
  # --- CONFIGURATION ---
12
- # Your Fine-Tuned Model
13
  MODEL_NAME = "ShivamVN/My-Ai-Text-Detector"
14
 
15
  # --- SETUP ---
 
16
  nltk.download('punkt')
 
17
  print("Initializing App...")
18
 
19
  # Detect Hardware
20
  device = "cuda" if torch.cuda.is_available() else "cpu"
21
 
22
- # 1. Load RoBERTa (For Both Templates)
23
  print(f"Loading {MODEL_NAME}...")
24
  try:
25
  clf_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME).to(device)
26
  clf_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
27
  except Exception as e:
28
  print(f"Error loading RoBERTa: {e}")
29
- print("Ensure your Model Repo is PUBLIC in Settings!")
30
 
31
- # 2. Load GPT-2 (Only for Template 2)
32
  print("Loading GPT-2...")
33
  try:
34
  ppl_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
@@ -39,16 +36,13 @@ except Exception as e:
39
  # --- CORE FUNCTIONS ---
40
 
41
  def get_roberta_prob(text):
42
- """Returns scalar probability of AI (0.0 to 1.0)"""
43
  if not text.strip(): return 0.0
44
  inputs = clf_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
45
  with torch.no_grad():
46
  outputs = clf_model(**inputs)
47
- # Label 1 is AI
48
  return F.softmax(outputs.logits, dim=-1).cpu().numpy()[0][1]
49
 
50
  def get_perplexity(text):
51
- """Returns scalar Perplexity score"""
52
  if not text.strip(): return 0.0
53
  encodings = ppl_tokenizer(text, return_tensors="pt")
54
  input_ids = encodings.input_ids.to(device)
@@ -62,11 +56,9 @@ def get_perplexity(text):
62
  def template_model_only(text):
63
  if not text: return "Please enter text."
64
 
65
- # Just run RoBERTa on the full text
66
  ai_prob = get_roberta_prob(text)
67
  percent = ai_prob * 100
68
 
69
- # Simple formatting
70
  label = "AI-GENERATED" if ai_prob > 0.5 else "HUMAN-WRITTEN"
71
  emoji = "🔴" if ai_prob > 0.5 else "🟢"
72
 
@@ -81,7 +73,7 @@ def template_full_system(text):
81
  sentences = sent_tokenize(text)
82
  if not sentences: return "No text detected."
83
 
84
- # 1. SLIDING WINDOW (RoBERTa)
85
  window_size = 2
86
  sentence_raw_scores = {i: [] for i in range(len(sentences))}
87
 
@@ -91,7 +83,7 @@ def template_full_system(text):
91
  for j in range(window_size):
92
  sentence_raw_scores[i+j].append(prob)
93
 
94
- # 2. HYBRID LOGIC (Per Sentence)
95
  log_output = f"{'SENTENCE':<60} | {'SCORE':<5} | {'PPL':<4} | {'VERDICT'}\n"
96
  log_output += "-" * 95 + "\n"
97
 
@@ -102,9 +94,12 @@ def template_full_system(text):
102
  scores = sentence_raw_scores[i]
103
  if not scores: scores = [0.0]
104
 
105
- # RoBERTa Status
106
  min_s = min(scores)
107
  max_s = max(scores)
 
 
 
108
  status = "UNCERTAIN"
109
  if min_s > 0.80: status = "AI"
110
  elif max_s < 0.20: status = "HUMAN"
@@ -115,15 +110,22 @@ def template_full_system(text):
115
  # Final Decision Logic
116
  final = "HUMAN"
117
  if status == "UNCERTAIN":
118
- if ppl < 40: final = "AI" # Low PPL confirms AI
119
  elif status == "AI":
120
- if ppl < 100: final = "AI" # Sanity check
121
 
122
  if final == "AI": total_ai += 1
123
 
124
- # Table Row formatting
 
 
 
 
 
 
 
125
  disp_sent = (sent[:57] + "..") if len(sent) > 57 else sent.ljust(59)
126
- score_val = f"{max(scores)*100:.0f}%"
127
  ppl_val = f"{int(ppl)}"
128
  log_output += f"{disp_sent} | {score_val:<5} | {ppl_val:<4} | {final}\n"
129
 
@@ -135,36 +137,25 @@ def template_full_system(text):
135
  return f"# {verdict}\n**AI Sentence Count:** {ai_percent:.1f}%\n\n```text\n{log_output}\n```"
136
 
137
  # ==========================================
138
- # USER INTERFACE (Gradio)
139
  # ==========================================
140
- # FIXED: Removed theme argument to prevent errors
141
  with gr.Blocks() as demo:
142
  gr.Markdown("# 🕵️‍♂️ AI Text Detector Suite")
143
  gr.Markdown(f"Current Model: `{MODEL_NAME}`")
144
 
145
  with gr.Tabs():
146
-
147
- # --- TAB 1: MODEL ONLY ---
148
  with gr.TabItem("Template 1: Only Model"):
149
  gr.Markdown("### ⚡ Fast Check")
150
- gr.Markdown("Uses **only RoBERTa** to scan the text as a single block. Good for quick, rough estimates.")
151
-
152
  t1_input = gr.Textbox(lines=5, placeholder="Paste text here...", label="Input Text")
153
  t1_button = gr.Button("Analyze (Model Only)", variant="primary")
154
  t1_output = gr.Markdown(label="Result")
155
-
156
  t1_button.click(template_model_only, inputs=t1_input, outputs=t1_output)
157
 
158
- # --- TAB 2: FULL SYSTEM ---
159
  with gr.TabItem("Template 2: Full System"):
160
  gr.Markdown("### 🧠 Deep Analysis")
161
- gr.Markdown("Uses **RoBERTa + GPT-2 + Logic**. Breaks text into sentences, checks context, and analyzes randomness.")
162
-
163
  t2_input = gr.Textbox(lines=8, placeholder="Paste text here...", label="Input Text")
164
  t2_button = gr.Button("Analyze (Full System)", variant="primary")
165
  t2_output = gr.Markdown(label="Detailed Report")
166
-
167
  t2_button.click(template_full_system, inputs=t2_input, outputs=t2_output)
168
 
169
- # Launch
170
  demo.launch()
 
3
  import torch.nn.functional as F
4
  from transformers import AutoModelForSequenceClassification, AutoTokenizer, GPT2LMHeadModel, GPT2TokenizerFast
5
  from nltk.tokenize import sent_tokenize
 
6
  import nltk
7
+
 
 
8
  # --- CONFIGURATION ---
 
9
  MODEL_NAME = "ShivamVN/My-Ai-Text-Detector"
10
 
11
  # --- SETUP ---
12
+ # Fix for the nltk error
13
  nltk.download('punkt')
14
+ nltk.download('punkt_tab')
15
  print("Initializing App...")
16
 
17
  # Detect Hardware
18
  device = "cuda" if torch.cuda.is_available() else "cpu"
19
 
20
+ # 1. Load RoBERTa
21
  print(f"Loading {MODEL_NAME}...")
22
  try:
23
  clf_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME).to(device)
24
  clf_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
25
  except Exception as e:
26
  print(f"Error loading RoBERTa: {e}")
 
27
 
28
+ # 2. Load GPT-2
29
  print("Loading GPT-2...")
30
  try:
31
  ppl_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
 
36
  # --- CORE FUNCTIONS ---
37
 
38
  def get_roberta_prob(text):
 
39
  if not text.strip(): return 0.0
40
  inputs = clf_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
41
  with torch.no_grad():
42
  outputs = clf_model(**inputs)
 
43
  return F.softmax(outputs.logits, dim=-1).cpu().numpy()[0][1]
44
 
45
  def get_perplexity(text):
 
46
  if not text.strip(): return 0.0
47
  encodings = ppl_tokenizer(text, return_tensors="pt")
48
  input_ids = encodings.input_ids.to(device)
 
56
  def template_model_only(text):
57
  if not text: return "Please enter text."
58
 
 
59
  ai_prob = get_roberta_prob(text)
60
  percent = ai_prob * 100
61
 
 
62
  label = "AI-GENERATED" if ai_prob > 0.5 else "HUMAN-WRITTEN"
63
  emoji = "🔴" if ai_prob > 0.5 else "🟢"
64
 
 
73
  sentences = sent_tokenize(text)
74
  if not sentences: return "No text detected."
75
 
76
+ # 1. SLIDING WINDOW
77
  window_size = 2
78
  sentence_raw_scores = {i: [] for i in range(len(sentences))}
79
 
 
83
  for j in range(window_size):
84
  sentence_raw_scores[i+j].append(prob)
85
 
86
+ # 2. HYBRID LOGIC
87
  log_output = f"{'SENTENCE':<60} | {'SCORE':<5} | {'PPL':<4} | {'VERDICT'}\n"
88
  log_output += "-" * 95 + "\n"
89
 
 
94
  scores = sentence_raw_scores[i]
95
  if not scores: scores = [0.0]
96
 
97
+ # Calculate Stats
98
  min_s = min(scores)
99
  max_s = max(scores)
100
+ avg_s = sum(scores) / len(scores) # <--- NEW: Calculate Average
101
+
102
+ # Determine Status
103
  status = "UNCERTAIN"
104
  if min_s > 0.80: status = "AI"
105
  elif max_s < 0.20: status = "HUMAN"
 
110
  # Final Decision Logic
111
  final = "HUMAN"
112
  if status == "UNCERTAIN":
113
+ if ppl < 40: final = "AI"
114
  elif status == "AI":
115
+ if ppl < 100: final = "AI"
116
 
117
  if final == "AI": total_ai += 1
118
 
119
+ # --- DISPLAY LOGIC FIX ---
120
+ # If Uncertain, show the Average (e.g., 50%) instead of Max (e.g., 99%)
121
+ if status == "UNCERTAIN":
122
+ display_score = avg_s
123
+ else:
124
+ display_score = max_s
125
+
126
+ # Formatting
127
  disp_sent = (sent[:57] + "..") if len(sent) > 57 else sent.ljust(59)
128
+ score_val = f"{display_score*100:.0f}%"
129
  ppl_val = f"{int(ppl)}"
130
  log_output += f"{disp_sent} | {score_val:<5} | {ppl_val:<4} | {final}\n"
131
 
 
137
  return f"# {verdict}\n**AI Sentence Count:** {ai_percent:.1f}%\n\n```text\n{log_output}\n```"
138
 
139
  # ==========================================
140
+ # USER INTERFACE
141
  # ==========================================
 
142
  with gr.Blocks() as demo:
143
  gr.Markdown("# 🕵️‍♂️ AI Text Detector Suite")
144
  gr.Markdown(f"Current Model: `{MODEL_NAME}`")
145
 
146
  with gr.Tabs():
 
 
147
  with gr.TabItem("Template 1: Only Model"):
148
  gr.Markdown("### ⚡ Fast Check")
 
 
149
  t1_input = gr.Textbox(lines=5, placeholder="Paste text here...", label="Input Text")
150
  t1_button = gr.Button("Analyze (Model Only)", variant="primary")
151
  t1_output = gr.Markdown(label="Result")
 
152
  t1_button.click(template_model_only, inputs=t1_input, outputs=t1_output)
153
 
 
154
  with gr.TabItem("Template 2: Full System"):
155
  gr.Markdown("### 🧠 Deep Analysis")
 
 
156
  t2_input = gr.Textbox(lines=8, placeholder="Paste text here...", label="Input Text")
157
  t2_button = gr.Button("Analyze (Full System)", variant="primary")
158
  t2_output = gr.Markdown(label="Detailed Report")
 
159
  t2_button.click(template_full_system, inputs=t2_input, outputs=t2_output)
160
 
 
161
  demo.launch()