um41r commited on
Commit
ac3de2e
Β·
verified Β·
1 Parent(s): f6f27ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -71
app.py CHANGED
@@ -1,73 +1,50 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
3
  import torch
4
- import torch.nn.functional as F
5
 
6
- MODEL_ID = "yhzhang3/detect-gpt"
7
-
8
- # --- Manually override labels since the model config has generic LABEL_0/LABEL_1 ---
9
- # label 0 = Human-written, label 1 = AI-generated (standard convention for detect-gpt)
10
- ID2LABEL = {0: "πŸ§‘ Human-Written", 1: "πŸ€– AI-Generated"}
11
 
12
  print("Loading model...")
13
- config = AutoConfig.from_pretrained(
14
- MODEL_ID,
15
- id2label=ID2LABEL,
16
- label2id={"Human-Written": 0, "AI-Generated": 1},
17
  )
18
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
19
- model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID, config=config)
20
- model.eval()
21
-
22
- def detect(text: str, show_debug: bool):
23
- if not text.strip():
24
- return "⚠️ Please enter some text.", {}, ""
25
-
26
- inputs = tokenizer(
27
- text,
28
- return_tensors="pt",
29
- truncation=True,
30
- max_length=512,
31
- padding=True,
32
- )
33
 
34
- token_count = inputs["input_ids"].shape[1]
 
 
 
 
35
 
36
- with torch.no_grad():
37
- outputs = model(**inputs)
38
- logits = outputs.logits # shape: [1, 2]
39
-
40
- probs = F.softmax(logits, dim=-1).squeeze()
41
-
42
- scores = {
43
- "Human-Written": round(float(probs[0]), 4),
44
- "AI-Generated": round(float(probs[1]), 4),
45
- }
46
 
47
- best_label = max(scores, key=scores.get)
48
- confidence = scores[best_label]
 
 
49
 
50
- emoji = "πŸ€–" if "AI" in best_label else "πŸ§‘"
51
- verdict = f"**{emoji} {best_label}** β€” {confidence:.1%} confidence"
52
 
53
- debug_info = ""
54
- if show_debug:
55
- debug_info = (
56
- f"**Raw logits:** {logits.squeeze().tolist()}\n\n"
57
- f"**Softmax probs:** {probs.tolist()}\n\n"
58
- f"**Tokens used:** {token_count} / 512\n\n"
59
- f"**Model num_labels:** {model.config.num_labels}"
60
- )
61
 
62
- return verdict, scores, debug_info
63
 
64
 
65
- with gr.Blocks(title="Detect-GPT") as demo:
66
  gr.Markdown("""
67
- # πŸ” Detect-GPT β€” AI vs Human Text Classifier
68
- Paste text to check if it was written by a **human** or **AI**.
69
- Model: [`yhzhang3/detect-gpt`](https://huggingface.co/yhzhang3/detect-gpt) (BERT fine-tuned for sequence classification)
70
- > βœ‚οΈ Input is **truncated to 512 tokens**. Use complete paragraphs for best accuracy.
 
71
  """)
72
 
73
  with gr.Row():
@@ -77,7 +54,6 @@ with gr.Blocks(title="Detect-GPT") as demo:
77
  placeholder="Paste your text here...",
78
  lines=10,
79
  )
80
- show_debug = gr.Checkbox(label="Show debug info (logits, token count)", value=False)
81
  with gr.Row():
82
  clear_btn = gr.Button("Clear")
83
  submit_btn = gr.Button("Analyze", variant="primary")
@@ -85,26 +61,18 @@ with gr.Blocks(title="Detect-GPT") as demo:
85
  with gr.Column(scale=1):
86
  verdict_out = gr.Markdown(label="Verdict")
87
  scores_out = gr.Label(label="Confidence", num_top_classes=2)
88
- debug_out = gr.Markdown(label="Debug Info")
89
 
90
  gr.Examples(
91
  examples=[
92
- ["The mitochondria is the powerhouse of the cell. It generates ATP via cellular respiration in the inner mitochondrial membrane."],
93
- ["In an era defined by the emergent capabilities of large language models, the epistemological boundaries between human cognition and machine-generated text have become increasingly indistinct."],
94
- ["hey so i forgot to send the report lol, will do it tmrw morning promise"],
95
- ["The results of our experiment confirmed the hypothesis. We observed a significant increase in reaction rate as temperature rose from 25Β°C to 75Β°C."],
96
  ],
97
  inputs=text_input,
98
  )
99
 
100
- submit_btn.click(
101
- fn=detect,
102
- inputs=[text_input, show_debug],
103
- outputs=[verdict_out, scores_out, debug_out],
104
- )
105
- clear_btn.click(
106
- fn=lambda: ("", False, "", None, ""),
107
- outputs=[text_input, show_debug, verdict_out, scores_out, debug_out],
108
- )
109
 
110
- demo.launch()
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
  import torch
 
4
 
5
+ MODEL_ID = "openai-community/roberta-base-openai-detector"
 
 
 
 
6
 
7
  print("Loading model...")
8
+ classifier = pipeline(
9
+ "text-classification",
10
+ model=MODEL_ID,
11
+ device=0 if torch.cuda.is_available() else -1,
12
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ # Model labels: "Real" = Human-written, "Fake" = AI-generated
15
+ LABEL_MAP = {
16
+ "Real": ("πŸ§‘ Human-Written", "#2ecc71"),
17
+ "Fake": ("πŸ€– AI-Generated", "#e74c3c"),
18
+ }
19
 
20
+ def detect(text: str):
21
+ if not text.strip():
22
+ return "⚠️ Please enter some text.", {}
 
 
 
 
 
 
 
23
 
24
+ result = classifier(text, truncation=True, max_length=512)[0]
25
+ label = result["label"] # "Real" or "Fake"
26
+ score = result["score"] # confidence for the predicted label
27
+ alt_score = 1.0 - score # confidence for the other label
28
 
29
+ display_label, _ = LABEL_MAP[label]
30
+ verdict = f"**{display_label}** β€” {score:.1%} confidence"
31
 
32
+ # Build scores dict with friendly names for gr.Label
33
+ if label == "Real":
34
+ scores = {"πŸ§‘ Human-Written": round(score, 4), "πŸ€– AI-Generated": round(alt_score, 4)}
35
+ else:
36
+ scores = {"πŸ€– AI-Generated": round(score, 4), "πŸ§‘ Human-Written": round(alt_score, 4)}
 
 
 
37
 
38
+ return verdict, scores
39
 
40
 
41
+ with gr.Blocks(title="AI Text Detector") as demo:
42
  gr.Markdown("""
43
+ # πŸ” AI Text Detector
44
+ Paste any text to check if it was written by a **human** or an **AI**.
45
+ Model: [`openai-community/roberta-base-openai-detector`](https://huggingface.co/openai-community/roberta-base-openai-detector)
46
+ *(RoBERTa fine-tuned by OpenAI on GPT-2 outputs)*
47
+ > βœ‚οΈ Text is truncated to **512 tokens**. Use full paragraphs for best results.
48
  """)
49
 
50
  with gr.Row():
 
54
  placeholder="Paste your text here...",
55
  lines=10,
56
  )
 
57
  with gr.Row():
58
  clear_btn = gr.Button("Clear")
59
  submit_btn = gr.Button("Analyze", variant="primary")
 
61
  with gr.Column(scale=1):
62
  verdict_out = gr.Markdown(label="Verdict")
63
  scores_out = gr.Label(label="Confidence", num_top_classes=2)
 
64
 
65
  gr.Examples(
66
  examples=[
67
+ ["hey so i forgot to send the report lol, will do it tmrw morning i promise"],
68
+ ["The mitochondria is the powerhouse of the cell, generating ATP through oxidative phosphorylation in the inner mitochondrial membrane."],
69
+ ["In an era defined by the emergent capabilities of large language models, the epistemological boundaries between human and machine-generated text have become increasingly indistinct, necessitating robust detection frameworks."],
70
+ ["I honestly don't know what to do anymore. Everything feels so overwhelming and I just needed to write this down somewhere."],
71
  ],
72
  inputs=text_input,
73
  )
74
 
75
+ submit_btn.click(fn=detect, inputs=text_input, outputs=[verdict_out, scores_out])
76
+ clear_btn.click(fn=lambda: ("", None), outputs=[text_input, scores_out])
 
 
 
 
 
 
 
77
 
78
+ demo.launch()