VictorM-Coder commited on
Commit
b0b36a6
·
verified ·
1 Parent(s): c40b953

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -13
app.py CHANGED
@@ -1,5 +1,7 @@
 
1
  import re
2
  from typing import List, Tuple
 
3
  import gradio as gr
4
  from transformers import pipeline
5
 
@@ -8,6 +10,9 @@ from transformers import pipeline
8
  # -----------------------------
9
 
10
  MODEL_ID = "fakespot-ai/roberta-base-ai-text-detection-v1"
 
 
 
11
  clf = pipeline("text-classification", model=MODEL_ID)
12
 
13
  def clean_text(s: str) -> str:
@@ -31,20 +36,22 @@ def chunk_text(text: str, max_words: int = 300) -> List[str]:
31
  def detect_ai(text: str) -> Tuple[str, float, str]:
32
  """
33
  Returns (label, score_float, explanation)
 
 
 
34
  """
35
  if not text or not text.strip():
36
  return "—", 0.0, "Please paste some text to analyze."
37
 
38
  chunks = [clean_text(c) for c in chunk_text(text, max_words=300)]
39
- # Run model in batch for speed
 
40
  preds = clf(chunks)
41
 
42
- # Aggregate: average confidence across chunks for the detected class
43
- # The model returns a label per chunk; we map AI=1, Human=0 and average
44
  ai_probs = []
45
  for p in preds:
46
- # Some models return 'AI'/'Human' labels; normalize
47
- label = p.get("label", "").upper()
48
  score = float(p.get("score", 0.0))
49
  ai_prob = score if label.startswith("AI") else (1.0 - score)
50
  ai_probs.append(ai_prob)
@@ -52,9 +59,7 @@ def detect_ai(text: str) -> Tuple[str, float, str]:
52
  mean_ai = sum(ai_probs) / len(ai_probs)
53
  label = "AI" if mean_ai >= 0.5 else "Human"
54
 
55
- # Lightweight heuristic explanation (no extra LLM needed)
56
  explanation = build_explanation(text, mean_ai, len(chunks))
57
-
58
  return label, float(mean_ai), explanation
59
 
60
  def build_explanation(text: str, ai_prob: float, n_chunks: int) -> str:
@@ -63,7 +68,10 @@ def build_explanation(text: str, ai_prob: float, n_chunks: int) -> str:
63
  words = [w for w in words if w.strip()]
64
  sentences = [s for s in sentences if s.strip()]
65
 
66
- avg_len = (sum(len(s.split()) for s in sentences) / max(1, len(sentences))) if sentences else 0
 
 
 
67
  vocab = set(w.lower() for w in words)
68
  ttr = len(vocab) / max(1, len(words)) # type-token ratio
69
 
@@ -92,7 +100,7 @@ def build_explanation(text: str, ai_prob: float, n_chunks: int) -> str:
92
  return (
93
  f"Overall this text is estimated to be {ai_prob:.2%} likely AI-generated. "
94
  f"Notable cues: " + "; ".join(cues) + ". "
95
- "Remember: detectors can be wrong—use results as a hint, not proof."
96
  )
97
 
98
  # -----------------------------
@@ -100,21 +108,31 @@ def build_explanation(text: str, ai_prob: float, n_chunks: int) -> str:
100
  # -----------------------------
101
 
102
  with gr.Blocks(title="AI Text Detector") as demo:
103
- gr.Markdown("## 🕵️ AI Text Detector (Simple)\nPaste text and get an approximate AI-likeness score.\n"
104
- "> Model: `fakespot-ai/roberta-base-ai-text-detection-v1`")
 
 
 
105
 
106
  with gr.Row():
107
  inp = gr.Textbox(label="Input Text", lines=14, placeholder="Paste your text here...")
 
108
  with gr.Row():
109
  label_out = gr.Label(label="Predicted Class")
110
  score_out = gr.Slider(label="AI Likelihood", minimum=0.0, maximum=1.0, step=0.001, interactive=False)
 
111
  explain = gr.Textbox(label="Explanation", lines=6)
112
 
113
- def _run(t):
114
  label, score, expl = detect_ai(t)
 
115
  return {label_out: {label: 1.0}, score_out: score, explain: expl}
116
 
117
  gr.Button("Analyze").click(_run, inputs=inp, outputs=[label_out, score_out, explain])
118
 
119
  if __name__ == "__main__":
120
- demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))
 
 
 
 
 
1
+ import os
2
  import re
3
  from typing import List, Tuple
4
+
5
  import gradio as gr
6
  from transformers import pipeline
7
 
 
10
  # -----------------------------
11
 
12
  MODEL_ID = "fakespot-ai/roberta-base-ai-text-detection-v1"
13
+
14
+ # If you’re on CPU-only Space and want to be explicit, uncomment device=-1
15
+ # clf = pipeline("text-classification", model=MODEL_ID, device=-1)
16
  clf = pipeline("text-classification", model=MODEL_ID)
17
 
18
  def clean_text(s: str) -> str:
 
36
  def detect_ai(text: str) -> Tuple[str, float, str]:
37
  """
38
  Returns (label, score_float, explanation)
39
+ - label: "AI" or "Human"
40
+ - score_float: mean AI likelihood in [0,1]
41
+ - explanation: short narrative with a few heuristic cues
42
  """
43
  if not text or not text.strip():
44
  return "—", 0.0, "Please paste some text to analyze."
45
 
46
  chunks = [clean_text(c) for c in chunk_text(text, max_words=300)]
47
+
48
+ # Batch for speed and lower overhead
49
  preds = clf(chunks)
50
 
51
+ # Aggregate AI likelihood: if a chunk label is 'AI', use score; if 'Human', use (1-score)
 
52
  ai_probs = []
53
  for p in preds:
54
+ label = str(p.get("label", "")).upper()
 
55
  score = float(p.get("score", 0.0))
56
  ai_prob = score if label.startswith("AI") else (1.0 - score)
57
  ai_probs.append(ai_prob)
 
59
  mean_ai = sum(ai_probs) / len(ai_probs)
60
  label = "AI" if mean_ai >= 0.5 else "Human"
61
 
 
62
  explanation = build_explanation(text, mean_ai, len(chunks))
 
63
  return label, float(mean_ai), explanation
64
 
65
  def build_explanation(text: str, ai_prob: float, n_chunks: int) -> str:
 
68
  words = [w for w in words if w.strip()]
69
  sentences = [s for s in sentences if s.strip()]
70
 
71
+ avg_len = (
72
+ sum(len(s.split()) for s in sentences) / max(1, len(sentences))
73
+ if sentences else 0
74
+ )
75
  vocab = set(w.lower() for w in words)
76
  ttr = len(vocab) / max(1, len(words)) # type-token ratio
77
 
 
100
  return (
101
  f"Overall this text is estimated to be {ai_prob:.2%} likely AI-generated. "
102
  f"Notable cues: " + "; ".join(cues) + ". "
103
+ "Reminder: detectors can be wrong—use results as a hint, not proof."
104
  )
105
 
106
  # -----------------------------
 
108
  # -----------------------------
109
 
110
  with gr.Blocks(title="AI Text Detector") as demo:
111
+ gr.Markdown(
112
+ "## 🕵️ AI Text Detector (Simple)\n"
113
+ "Paste text and get an approximate AI-likeness score.\n\n"
114
+ "> Model: `fakespot-ai/roberta-base-ai-text-detection-v1`"
115
+ )
116
 
117
  with gr.Row():
118
  inp = gr.Textbox(label="Input Text", lines=14, placeholder="Paste your text here...")
119
+
120
  with gr.Row():
121
  label_out = gr.Label(label="Predicted Class")
122
  score_out = gr.Slider(label="AI Likelihood", minimum=0.0, maximum=1.0, step=0.001, interactive=False)
123
+
124
  explain = gr.Textbox(label="Explanation", lines=6)
125
 
126
+ def _run(t: str):
127
  label, score, expl = detect_ai(t)
128
+ # gr.Label expects a dict of {class_name: confidence} for pretty display
129
  return {label_out: {label: 1.0}, score_out: score, explain: expl}
130
 
131
  gr.Button("Analyze").click(_run, inputs=inp, outputs=[label_out, score_out, explain])
132
 
133
  if __name__ == "__main__":
134
+ # For Spaces, PORT is provided by the environment
135
+ demo.queue(concurrency_count=1).launch(
136
+ server_name="0.0.0.0",
137
+ server_port=int(os.getenv("PORT", 7860))
138
+ )