Spaces:

VictorM-Coder
/

AIDetector

Running

App Files Files Community

VictorM-Coder commited on Sep 8, 2025

Commit

965a472

verified ·

1 Parent(s): 57bb1ed

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -20

app.py CHANGED Viewed

@@ -3,28 +3,26 @@ import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import re
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Use one tokenizer across all ensemble models
 tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
-# Load 3 models from Hugging Face (no local .bin required)
 model_names = [
-    "mihalykiss/modernbert_2/Model_groups_3class_seed12",
-    "mihalykiss/modernbert_2/Model_groups_3class_seed22",
-    "mihalykiss/modernbert_2/Model_groups_3class_seed32",  # third ensemble variant
 ]
 models = []
-for name in model_names:
-    m = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41)
-    m.load_state_dict(torch.hub.load_state_dict_from_url(
-        f"https://huggingface.co/{name}/resolve/main/pytorch_model.bin",
-        map_location=device
-    ))
-    m.to(device).eval()
     models.append(m)
 label_mapping = {
     0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
     6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b',
@@ -38,17 +36,18 @@ label_mapping = {
     39: 'text-davinci-002', 40: 'text-davinci-003'
 }
 def clean_text(text: str) -> str:
     text = re.sub(r"\s{2,}", " ", text)
     text = re.sub(r"\s+([,.;:?!])", r"\1", text)
     return text.strip()
 def classify_text(text):
     cleaned_text = clean_text(text)
     if not cleaned_text:
         return "Please paste some text."
-    # Split text into sentences for per-sentence highlighting
     sentences = re.split(r'(?<=[.!?])\s+', cleaned_text)
     highlighted = []
@@ -57,6 +56,7 @@ def classify_text(text):
     for sent in sentences:
         if not sent.strip():
             continue
         inputs = tokenizer(sent, return_tensors="pt", truncation=True, padding=True).to(device)
         with torch.no_grad():
             probs_list = []
@@ -66,6 +66,7 @@ def classify_text(text):
             avg_probs = sum(probs_list) / len(probs_list)
             probs = avg_probs[0]
         ai_probs = probs.clone()
         ai_probs[24] = 0
         ai_score = ai_probs.sum().item() * 100
@@ -74,27 +75,28 @@ def classify_text(text):
         total_ai += ai_score
         total_human += human_score
-        if ai_score > 20:  # highlight AI-like sentences
             highlighted.append(f"<span class='highlight-ai'>{sent}</span>")
         else:
             highlighted.append(f"<span class='highlight-human'>{sent}</span>")
-    # Global decision
     if total_human >= total_ai:
-        verdict = f"<br><br><b>Overall: {total_human/(total_ai+total_human)*100:.2f}% Human</b>"
     else:
-        verdict = f"<br><br><b>Overall: {total_ai/(total_ai+total_human)*100:.2f}% AI</b>"
     return " ".join(highlighted) + verdict
-# Gradio UI
 iface = gr.Interface(
     fn=classify_text,
     inputs=gr.Textbox(lines=6, placeholder="Paste text here..."),
     outputs="html",
     title="AI Text Detector",
-    description="Detects AI-generated text using ModernBERT ensemble and highlights AI-like vs Human-like sentences."
 )
 iface.launch()

 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import re
+# Use GPU if available
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# One tokenizer shared across models
 tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
+# Ensemble model repos (replace with real Hugging Face repos if names differ)
 model_names = [
+    "mihalykiss/modernbert_2_seed12",
+    "mihalykiss/modernbert_2_seed22",
+    "mihalykiss/modernbert_2_seed32"
 ]
+# Load models directly from Hugging Face
 models = []
+for repo in model_names:
+    m = AutoModelForSequenceClassification.from_pretrained(repo).to(device).eval()
     models.append(m)
+# Label map
 label_mapping = {
     0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
     6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b',
     39: 'text-davinci-002', 40: 'text-davinci-003'
 }
+# Text cleanup
 def clean_text(text: str) -> str:
     text = re.sub(r"\s{2,}", " ", text)
     text = re.sub(r"\s+([,.;:?!])", r"\1", text)
     return text.strip()
+# Classification function
 def classify_text(text):
     cleaned_text = clean_text(text)
     if not cleaned_text:
         return "Please paste some text."
     sentences = re.split(r'(?<=[.!?])\s+', cleaned_text)
     highlighted = []
     for sent in sentences:
         if not sent.strip():
             continue
         inputs = tokenizer(sent, return_tensors="pt", truncation=True, padding=True).to(device)
         with torch.no_grad():
             probs_list = []
             avg_probs = sum(probs_list) / len(probs_list)
             probs = avg_probs[0]
+        # Human class = 24, AI = all others
         ai_probs = probs.clone()
         ai_probs[24] = 0
         ai_score = ai_probs.sum().item() * 100
         total_ai += ai_score
         total_human += human_score
+        if ai_score > 20:
             highlighted.append(f"<span class='highlight-ai'>{sent}</span>")
         else:
             highlighted.append(f"<span class='highlight-human'>{sent}</span>")
+    # Global verdict
     if total_human >= total_ai:
+        verdict = f"<br><br><b>Overall: {(total_human/(total_ai+total_human))*100:.2f}% Human</b>"
     else:
+        verdict = f"<br><br><b>Overall: {(total_ai/(total_ai+total_human))*100:.2f}% AI</b>"
     return " ".join(highlighted) + verdict
+# Gradio interface with styling
 iface = gr.Interface(
     fn=classify_text,
     inputs=gr.Textbox(lines=6, placeholder="Paste text here..."),
     outputs="html",
     title="AI Text Detector",
+    description="Detects AI-generated text using a ModernBERT ensemble. Sentences are highlighted:<br>"
+                "<span style='color:#FF5733;font-weight:bold;'>AI-like</span> vs "
+                "<span style='color:#4CAF50;font-weight:bold;'>Human-like</span>."
 )
 iface.launch()