Spaces:

um41r
/

Gpt_Detector

Sleeping

App Files Files Community

um41r commited on 27 days ago

Commit

ac3de2e

verified ·

1 Parent(s): f6f27ab

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -71

app.py CHANGED Viewed

@@ -1,73 +1,50 @@
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
 import torch
-import torch.nn.functional as F
-MODEL_ID = "yhzhang3/detect-gpt"
-# --- Manually override labels since the model config has generic LABEL_0/LABEL_1 ---
-# label 0 = Human-written, label 1 = AI-generated (standard convention for detect-gpt)
-ID2LABEL = {0: "🧑 Human-Written", 1: "🤖 AI-Generated"}
 print("Loading model...")
-config = AutoConfig.from_pretrained(
-    MODEL_ID,
-    id2label=ID2LABEL,
-    label2id={"Human-Written": 0, "AI-Generated": 1},
 )
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID, config=config)
-model.eval()
-def detect(text: str, show_debug: bool):
-    if not text.strip():
-        return "⚠️ Please enter some text.", {}, ""
-    inputs = tokenizer(
-        text,
-        return_tensors="pt",
-        truncation=True,
-        max_length=512,
-        padding=True,
-    )
-    token_count = inputs["input_ids"].shape[1]
-    with torch.no_grad():
-        outputs = model(**inputs)
-        logits = outputs.logits  # shape: [1, 2]
-    probs = F.softmax(logits, dim=-1).squeeze()
-    scores = {
-        "Human-Written": round(float(probs[0]), 4),
-        "AI-Generated":  round(float(probs[1]), 4),
-    }
-    best_label = max(scores, key=scores.get)
-    confidence  = scores[best_label]
-    emoji    = "🤖" if "AI" in best_label else "🧑"
-    verdict  = f"**{emoji} {best_label}** — {confidence:.1%} confidence"
-    debug_info = ""
-    if show_debug:
-        debug_info = (
-            f"**Raw logits:** {logits.squeeze().tolist()}\n\n"
-            f"**Softmax probs:** {probs.tolist()}\n\n"
-            f"**Tokens used:** {token_count} / 512\n\n"
-            f"**Model num_labels:** {model.config.num_labels}"
-        )
-    return verdict, scores, debug_info
-with gr.Blocks(title="Detect-GPT") as demo:
     gr.Markdown("""
-    # 🔍 Detect-GPT — AI vs Human Text Classifier
-    Paste text to check if it was written by a **human** or **AI**.
-    Model: [`yhzhang3/detect-gpt`](https://huggingface.co/yhzhang3/detect-gpt) (BERT fine-tuned for sequence classification)
-    > ✂️ Input is **truncated to 512 tokens**. Use complete paragraphs for best accuracy.
     """)
     with gr.Row():
@@ -77,7 +54,6 @@ with gr.Blocks(title="Detect-GPT") as demo:
                 placeholder="Paste your text here...",
                 lines=10,
             )
-            show_debug = gr.Checkbox(label="Show debug info (logits, token count)", value=False)
             with gr.Row():
                 clear_btn  = gr.Button("Clear")
                 submit_btn = gr.Button("Analyze", variant="primary")
@@ -85,26 +61,18 @@ with gr.Blocks(title="Detect-GPT") as demo:
         with gr.Column(scale=1):
             verdict_out = gr.Markdown(label="Verdict")
             scores_out  = gr.Label(label="Confidence", num_top_classes=2)
-            debug_out   = gr.Markdown(label="Debug Info")
     gr.Examples(
         examples=[
-            ["The mitochondria is the powerhouse of the cell. It generates ATP via cellular respiration in the inner mitochondrial membrane."],
-            ["In an era defined by the emergent capabilities of large language models, the epistemological boundaries between human cognition and machine-generated text have become increasingly indistinct."],
-            ["hey so i forgot to send the report lol, will do it tmrw morning promise"],
-            ["The results of our experiment confirmed the hypothesis. We observed a significant increase in reaction rate as temperature rose from 25°C to 75°C."],
         ],
         inputs=text_input,
     )
-    submit_btn.click(
-        fn=detect,
-        inputs=[text_input, show_debug],
-        outputs=[verdict_out, scores_out, debug_out],
-    )
-    clear_btn.click(
-        fn=lambda: ("", False, "", None, ""),
-        outputs=[text_input, show_debug, verdict_out, scores_out, debug_out],
-    )
-demo.launch()

 import gradio as gr
+from transformers import pipeline
 import torch
+MODEL_ID = "openai-community/roberta-base-openai-detector"
 print("Loading model...")
+classifier = pipeline(
+    "text-classification",
+    model=MODEL_ID,
+    device=0 if torch.cuda.is_available() else -1,
 )
+# Model labels: "Real" = Human-written, "Fake" = AI-generated
+LABEL_MAP = {
+    "Real": ("🧑 Human-Written", "#2ecc71"),
+    "Fake": ("🤖 AI-Generated",  "#e74c3c"),
+}
+def detect(text: str):
+    if not text.strip():
+        return "⚠️ Please enter some text.", {}
+    result = classifier(text, truncation=True, max_length=512)[0]
+    label    = result["label"]   # "Real" or "Fake"
+    score    = result["score"]   # confidence for the predicted label
+    alt_score = 1.0 - score      # confidence for the other label
+    display_label, _ = LABEL_MAP[label]
+    verdict = f"**{display_label}** — {score:.1%} confidence"
+    # Build scores dict with friendly names for gr.Label
+    if label == "Real":
+        scores = {"🧑 Human-Written": round(score, 4), "🤖 AI-Generated": round(alt_score, 4)}
+    else:
+        scores = {"🤖 AI-Generated": round(score, 4), "🧑 Human-Written": round(alt_score, 4)}
+    return verdict, scores
+with gr.Blocks(title="AI Text Detector") as demo:
     gr.Markdown("""
+    # 🔍 AI Text Detector
+    Paste any text to check if it was written by a **human** or an **AI**.
+    Model: [`openai-community/roberta-base-openai-detector`](https://huggingface.co/openai-community/roberta-base-openai-detector)
+    *(RoBERTa fine-tuned by OpenAI on GPT-2 outputs)*
+    > ✂️ Text is truncated to **512 tokens**. Use full paragraphs for best results.
     """)
     with gr.Row():
                 placeholder="Paste your text here...",
                 lines=10,
             )
             with gr.Row():
                 clear_btn  = gr.Button("Clear")
                 submit_btn = gr.Button("Analyze", variant="primary")
         with gr.Column(scale=1):
             verdict_out = gr.Markdown(label="Verdict")
             scores_out  = gr.Label(label="Confidence", num_top_classes=2)
     gr.Examples(
         examples=[
+            ["hey so i forgot to send the report lol, will do it tmrw morning i promise"],
+            ["The mitochondria is the powerhouse of the cell, generating ATP through oxidative phosphorylation in the inner mitochondrial membrane."],
+            ["In an era defined by the emergent capabilities of large language models, the epistemological boundaries between human and machine-generated text have become increasingly indistinct, necessitating robust detection frameworks."],
+            ["I honestly don't know what to do anymore. Everything feels so overwhelming and I just needed to write this down somewhere."],
         ],
         inputs=text_input,
     )
+    submit_btn.click(fn=detect, inputs=text_input, outputs=[verdict_out, scores_out])
+    clear_btn.click(fn=lambda: ("", None), outputs=[text_input, scores_out])
+demo.launch()