VictorM-Coder commited on
Commit
965a472
·
verified ·
1 Parent(s): 57bb1ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -20
app.py CHANGED
@@ -3,28 +3,26 @@ import torch
3
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
  import re
5
 
 
6
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
7
 
8
- # Use one tokenizer across all ensemble models
9
  tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
10
 
11
- # Load 3 models from Hugging Face (no local .bin required)
12
  model_names = [
13
- "mihalykiss/modernbert_2/Model_groups_3class_seed12",
14
- "mihalykiss/modernbert_2/Model_groups_3class_seed22",
15
- "mihalykiss/modernbert_2/Model_groups_3class_seed32", # third ensemble variant
16
  ]
17
 
 
18
  models = []
19
- for name in model_names:
20
- m = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41)
21
- m.load_state_dict(torch.hub.load_state_dict_from_url(
22
- f"https://huggingface.co/{name}/resolve/main/pytorch_model.bin",
23
- map_location=device
24
- ))
25
- m.to(device).eval()
26
  models.append(m)
27
 
 
28
  label_mapping = {
29
  0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
30
  6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b',
@@ -38,17 +36,18 @@ label_mapping = {
38
  39: 'text-davinci-002', 40: 'text-davinci-003'
39
  }
40
 
 
41
  def clean_text(text: str) -> str:
42
  text = re.sub(r"\s{2,}", " ", text)
43
  text = re.sub(r"\s+([,.;:?!])", r"\1", text)
44
  return text.strip()
45
 
 
46
  def classify_text(text):
47
  cleaned_text = clean_text(text)
48
  if not cleaned_text:
49
  return "Please paste some text."
50
 
51
- # Split text into sentences for per-sentence highlighting
52
  sentences = re.split(r'(?<=[.!?])\s+', cleaned_text)
53
 
54
  highlighted = []
@@ -57,6 +56,7 @@ def classify_text(text):
57
  for sent in sentences:
58
  if not sent.strip():
59
  continue
 
60
  inputs = tokenizer(sent, return_tensors="pt", truncation=True, padding=True).to(device)
61
  with torch.no_grad():
62
  probs_list = []
@@ -66,6 +66,7 @@ def classify_text(text):
66
  avg_probs = sum(probs_list) / len(probs_list)
67
  probs = avg_probs[0]
68
 
 
69
  ai_probs = probs.clone()
70
  ai_probs[24] = 0
71
  ai_score = ai_probs.sum().item() * 100
@@ -74,27 +75,28 @@ def classify_text(text):
74
  total_ai += ai_score
75
  total_human += human_score
76
 
77
- if ai_score > 20: # highlight AI-like sentences
78
  highlighted.append(f"<span class='highlight-ai'>{sent}</span>")
79
  else:
80
  highlighted.append(f"<span class='highlight-human'>{sent}</span>")
81
 
82
- # Global decision
83
  if total_human >= total_ai:
84
- verdict = f"<br><br><b>Overall: {total_human/(total_ai+total_human)*100:.2f}% Human</b>"
85
  else:
86
- verdict = f"<br><br><b>Overall: {total_ai/(total_ai+total_human)*100:.2f}% AI</b>"
87
 
88
  return " ".join(highlighted) + verdict
89
 
90
-
91
- # Gradio UI
92
  iface = gr.Interface(
93
  fn=classify_text,
94
  inputs=gr.Textbox(lines=6, placeholder="Paste text here..."),
95
  outputs="html",
96
  title="AI Text Detector",
97
- description="Detects AI-generated text using ModernBERT ensemble and highlights AI-like vs Human-like sentences."
 
 
98
  )
99
 
100
  iface.launch()
 
3
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
  import re
5
 
6
+ # Use GPU if available
7
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
 
9
+ # One tokenizer shared across models
10
  tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
11
 
12
+ # Ensemble model repos (replace with real Hugging Face repos if names differ)
13
  model_names = [
14
+ "mihalykiss/modernbert_2_seed12",
15
+ "mihalykiss/modernbert_2_seed22",
16
+ "mihalykiss/modernbert_2_seed32"
17
  ]
18
 
19
+ # Load models directly from Hugging Face
20
  models = []
21
+ for repo in model_names:
22
+ m = AutoModelForSequenceClassification.from_pretrained(repo).to(device).eval()
 
 
 
 
 
23
  models.append(m)
24
 
25
+ # Label map
26
  label_mapping = {
27
  0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
28
  6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b',
 
36
  39: 'text-davinci-002', 40: 'text-davinci-003'
37
  }
38
 
39
+ # Text cleanup
40
  def clean_text(text: str) -> str:
41
  text = re.sub(r"\s{2,}", " ", text)
42
  text = re.sub(r"\s+([,.;:?!])", r"\1", text)
43
  return text.strip()
44
 
45
+ # Classification function
46
  def classify_text(text):
47
  cleaned_text = clean_text(text)
48
  if not cleaned_text:
49
  return "Please paste some text."
50
 
 
51
  sentences = re.split(r'(?<=[.!?])\s+', cleaned_text)
52
 
53
  highlighted = []
 
56
  for sent in sentences:
57
  if not sent.strip():
58
  continue
59
+
60
  inputs = tokenizer(sent, return_tensors="pt", truncation=True, padding=True).to(device)
61
  with torch.no_grad():
62
  probs_list = []
 
66
  avg_probs = sum(probs_list) / len(probs_list)
67
  probs = avg_probs[0]
68
 
69
+ # Human class = 24, AI = all others
70
  ai_probs = probs.clone()
71
  ai_probs[24] = 0
72
  ai_score = ai_probs.sum().item() * 100
 
75
  total_ai += ai_score
76
  total_human += human_score
77
 
78
+ if ai_score > 20:
79
  highlighted.append(f"<span class='highlight-ai'>{sent}</span>")
80
  else:
81
  highlighted.append(f"<span class='highlight-human'>{sent}</span>")
82
 
83
+ # Global verdict
84
  if total_human >= total_ai:
85
+ verdict = f"<br><br><b>Overall: {(total_human/(total_ai+total_human))*100:.2f}% Human</b>"
86
  else:
87
+ verdict = f"<br><br><b>Overall: {(total_ai/(total_ai+total_human))*100:.2f}% AI</b>"
88
 
89
  return " ".join(highlighted) + verdict
90
 
91
+ # Gradio interface with styling
 
92
  iface = gr.Interface(
93
  fn=classify_text,
94
  inputs=gr.Textbox(lines=6, placeholder="Paste text here..."),
95
  outputs="html",
96
  title="AI Text Detector",
97
+ description="Detects AI-generated text using a ModernBERT ensemble. Sentences are highlighted:<br>"
98
+ "<span style='color:#FF5733;font-weight:bold;'>AI-like</span> vs "
99
+ "<span style='color:#4CAF50;font-weight:bold;'>Human-like</span>."
100
  )
101
 
102
  iface.launch()