Spaces:

VictorM-Coder
/

AIDetector

Running

App Files Files Community

VictorM-Coder commited on Sep 8, 2025

Commit

57bb1ed

verified ·

1 Parent(s): 6f9b15a

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -78

app.py CHANGED Viewed

@@ -1,89 +1,100 @@
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 import re
-# Load model
-MODEL = "Hello-SimpleAI/HC3"
-tokenizer = AutoTokenizer.from_pretrained(MODEL)
-model = AutoModelForSequenceClassification.from_pretrained(MODEL)
-def split_sentences(paragraph):
-    """Split a paragraph into sentences."""
-    return re.split(r'(?<=[.!?]) +', paragraph.strip())
-def group_sentences(sentences, size=2):
-    """Group sentences into chunks of 2 (or remaining)."""
-    return [" ".join(sentences[i:i+size]) for i in range(0, len(sentences), size)]
-def detect_ai(text):
-    paragraphs = re.split(r"\n\s*\n", text.strip())
-    results = []
-    all_ai_flags = []  # store 1 = AI, 0 = Human
-    highlighted = ""
-    for para in paragraphs:
-        if not para.strip():
             continue
-        sentences = split_sentences(para)
-        chunks = group_sentences(sentences, size=2)
-        highlighted_para = ""
-        for chunk in chunks:
-            if not chunk.strip():
-                continue
-            inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=512)
-            with torch.no_grad():
-                outputs = model(**inputs)
-                probs = torch.softmax(outputs.logits, dim=1)
-            ai_score = float(probs[0][1])   # AI likelihood
-            human_score = 1 - ai_score      # Human likelihood
-            # Threshold check (AI > 20% → AI)
-            if ai_score > 0.2:
-                label = "🔴 AI"
-                color = "rgb(255,120,120)"  # red
-                all_ai_flags.append(1)
-            else:
-                label = "🟢 Human"
-                color = "rgb(120,255,120)"  # green
-                all_ai_flags.append(0)
-            highlighted_para += (
-                f"<div style='background-color:{color}; padding:4px; margin-bottom:4px; border-radius:4px'>"
-                f"<b>{label}</b> — Human {round(human_score*100,1)}% | AI {round(ai_score*100,1)}%<br>"
-                f"{chunk}</div>"
-            )
-        highlighted += f"<div style='margin-bottom:12px'>{highlighted_para}</div>"
-    # Compute overall result
-    if all_ai_flags:
-        ai_ratio = sum(all_ai_flags) / len(all_ai_flags)
-        if ai_ratio == 1:
-            overall = "🔴 Overall: 100% AI"
-        elif ai_ratio == 0:
-            overall = "🟢 Overall: 100% Human"
         else:
-            overall = f"⚖️ Overall AI Probability: {round(ai_ratio*100,2)}%"
-        highlighted += f"<p><b>{overall}</b></p>"
     else:
-        overall = "No text detected"
-    return highlighted, {"overall": overall, "chunks_checked": len(all_ai_flags)}
-with gr.Blocks() as demo:
-    gr.Markdown("## 🤖 AI Detector (2-sentence chunks)")
-    gr.Markdown("Groups of 2 sentences are checked. If AI >20%, the group is flagged as AI.")
-    input_text = gr.Textbox(lines=12, placeholder="Paste your essay or report here...")
-    output_html = gr.HTML()
-    output_json = gr.JSON()
-    run_btn = gr.Button("Detect AI")
-    run_btn.click(detect_ai, inputs=input_text, outputs=[output_html, output_json])
-demo.launch()

 import gradio as gr
 import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import re
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Use one tokenizer across all ensemble models
+tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
+# Load 3 models from Hugging Face (no local .bin required)
+model_names = [
+    "mihalykiss/modernbert_2/Model_groups_3class_seed12",
+    "mihalykiss/modernbert_2/Model_groups_3class_seed22",
+    "mihalykiss/modernbert_2/Model_groups_3class_seed32",  # third ensemble variant
+]
+models = []
+for name in model_names:
+    m = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41)
+    m.load_state_dict(torch.hub.load_state_dict_from_url(
+        f"https://huggingface.co/{name}/resolve/main/pytorch_model.bin",
+        map_location=device
+    ))
+    m.to(device).eval()
+    models.append(m)
+label_mapping = {
+    0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
+    6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b',
+    11: 'flan_t5_base', 12: 'flan_t5_large', 13: 'flan_t5_small',
+    14: 'flan_t5_xl', 15: 'flan_t5_xxl', 16: 'gemma-7b-it', 17: 'gemma2-9b-it',
+    18: 'gpt-3.5-turbo', 19: 'gpt-35', 20: 'gpt4', 21: 'gpt4o',
+    22: 'gpt_j', 23: 'gpt_neox', 24: 'human', 25: 'llama3-70b', 26: 'llama3-8b',
+    27: 'mixtral-8x7b', 28: 'opt_1.3b', 29: 'opt_125m', 30: 'opt_13b',
+    31: 'opt_2.7b', 32: 'opt_30b', 33: 'opt_350m', 34: 'opt_6.7b',
+    35: 'opt_iml_30b', 36: 'opt_iml_max_1.3b', 37: 't0_11b', 38: 't0_3b',
+    39: 'text-davinci-002', 40: 'text-davinci-003'
+}
+def clean_text(text: str) -> str:
+    text = re.sub(r"\s{2,}", " ", text)
+    text = re.sub(r"\s+([,.;:?!])", r"\1", text)
+    return text.strip()
+def classify_text(text):
+    cleaned_text = clean_text(text)
+    if not cleaned_text:
+        return "Please paste some text."
+    # Split text into sentences for per-sentence highlighting
+    sentences = re.split(r'(?<=[.!?])\s+', cleaned_text)
+    highlighted = []
+    total_ai, total_human = 0, 0
+    for sent in sentences:
+        if not sent.strip():
             continue
+        inputs = tokenizer(sent, return_tensors="pt", truncation=True, padding=True).to(device)
+        with torch.no_grad():
+            probs_list = []
+            for m in models:
+                logits = m(**inputs).logits
+                probs_list.append(torch.softmax(logits, dim=1))
+            avg_probs = sum(probs_list) / len(probs_list)
+            probs = avg_probs[0]
+        ai_probs = probs.clone()
+        ai_probs[24] = 0
+        ai_score = ai_probs.sum().item() * 100
+        human_score = 100 - ai_score
+        total_ai += ai_score
+        total_human += human_score
+        if ai_score > 20:  # highlight AI-like sentences
+            highlighted.append(f"<span class='highlight-ai'>{sent}</span>")
         else:
+            highlighted.append(f"<span class='highlight-human'>{sent}</span>")
+    # Global decision
+    if total_human >= total_ai:
+        verdict = f"<br><br><b>Overall: {total_human/(total_ai+total_human)*100:.2f}% Human</b>"
     else:
+        verdict = f"<br><br><b>Overall: {total_ai/(total_ai+total_human)*100:.2f}% AI</b>"
+    return " ".join(highlighted) + verdict
+# Gradio UI
+iface = gr.Interface(
+    fn=classify_text,
+    inputs=gr.Textbox(lines=6, placeholder="Paste text here..."),
+    outputs="html",
+    title="AI Text Detector",
+    description="Detects AI-generated text using ModernBERT ensemble and highlights AI-like vs Human-like sentences."
+)
+iface.launch()