VictorM-Coder commited on
Commit
91f4244
·
verified ·
1 Parent(s): 3535cf7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -12
app.py CHANGED
@@ -6,9 +6,9 @@ import pandas as pd
6
  import gradio as gr
7
 
8
  # -----------------------------
9
- # MODEL
10
  # -----------------------------
11
- MODEL_NAME = "openai-community/roberta-base-openai-detector"
12
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
13
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
  dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported()) else torch.float32
@@ -59,15 +59,18 @@ def sentence_split(text: str):
59
  return [_restore(s).strip() for s in sentences if s.strip()]
60
 
61
  # -----------------------------
62
- # CLASSIFY SENTENCE-BY-SENTENCE
63
  # -----------------------------
64
  def classify_sentence_by_sentence(text, threshold=0.70, max_len=512):
65
  sents = sentence_split(text)
66
  if not sents:
67
  return "⚠️ Please paste some text.", None, None
68
 
 
 
 
69
  inputs = tokenizer(
70
- sents, return_tensors="pt", padding=True, truncation=True, max_length=max_len
71
  ).to(device)
72
 
73
  with torch.no_grad():
@@ -76,8 +79,8 @@ def classify_sentence_by_sentence(text, threshold=0.70, max_len=512):
76
 
77
  rows = []
78
  highlights = []
79
- for i, s in enumerate(sents, start=1):
80
- ai_p = float(probs[i-1, 1].item())
81
  label = "AI" if ai_p >= threshold else "Human"
82
  pct = f"{ai_p*100:.1f}%"
83
 
@@ -89,24 +92,22 @@ def classify_sentence_by_sentence(text, threshold=0.70, max_len=512):
89
  else:
90
  color = "#b80d0d" # red
91
 
92
- # avoid backslashes inside the f-string expression
93
- normalized = re.sub(r"\s+", " ", s)
94
-
95
  highlights.append(
96
  "<div style='margin:6px 0; padding:6px 8px; border-radius:6px; background:rgba(0,0,0,0.03)'>"
97
  f"<strong style='color:{color}'>[{pct} {label}]</strong> {normalized}</div>"
98
  )
99
- rows.append([i, s, round(ai_p, 4), label])
100
 
101
  html = "\n".join(highlights)
102
  df = pd.DataFrame(rows, columns=["#", "Sentence", "AI_Prob", "Label"])
103
- return "Done ✅ (sentence-by-sentence only)", html, df
104
 
105
  # -----------------------------
106
  # GRADIO UI (minimal)
107
  # -----------------------------
108
  with gr.Blocks() as demo:
109
- gr.Markdown("### 🧠 Sentence-by-Sentence AI Check")
110
 
111
  text_input = gr.Textbox(label="Paste text", lines=14, placeholder="Your content…")
112
  threshold = gr.Slider(0.50, 0.95, value=0.70, step=0.01, label="AI threshold")
 
6
  import gradio as gr
7
 
8
  # -----------------------------
9
+ # MODEL (Fakespot 2025)
10
  # -----------------------------
11
+ MODEL_NAME = "fakespot-ai/roberta-base-ai-text-detection-v1"
12
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
13
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
  dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported()) else torch.float32
 
59
  return [_restore(s).strip() for s in sentences if s.strip()]
60
 
61
  # -----------------------------
62
+ # CLASSIFY SENTENCE-BY-SENTENCE (Fakespot: id2label[1] == "AI")
63
  # -----------------------------
64
  def classify_sentence_by_sentence(text, threshold=0.70, max_len=512):
65
  sents = sentence_split(text)
66
  if not sents:
67
  return "⚠️ Please paste some text.", None, None
68
 
69
+ # (Optional) light cleaning similar to model card guidance
70
+ clean_sents = [re.sub(r"\s+", " ", s).strip() for s in sents]
71
+
72
  inputs = tokenizer(
73
+ clean_sents, return_tensors="pt", padding=True, truncation=True, max_length=max_len
74
  ).to(device)
75
 
76
  with torch.no_grad():
 
79
 
80
  rows = []
81
  highlights = []
82
+ for i, orig in enumerate(sents, start=1):
83
+ ai_p = float(probs[i-1, 1].item()) # Fakespot: 1 == AI
84
  label = "AI" if ai_p >= threshold else "Human"
85
  pct = f"{ai_p*100:.1f}%"
86
 
 
92
  else:
93
  color = "#b80d0d" # red
94
 
95
+ normalized = re.sub(r"\s+", " ", orig)
 
 
96
  highlights.append(
97
  "<div style='margin:6px 0; padding:6px 8px; border-radius:6px; background:rgba(0,0,0,0.03)'>"
98
  f"<strong style='color:{color}'>[{pct} {label}]</strong> {normalized}</div>"
99
  )
100
+ rows.append([i, orig, round(ai_p, 4), label])
101
 
102
  html = "\n".join(highlights)
103
  df = pd.DataFrame(rows, columns=["#", "Sentence", "AI_Prob", "Label"])
104
+ return "Done ✅ (Fakespot detector)", html, df
105
 
106
  # -----------------------------
107
  # GRADIO UI (minimal)
108
  # -----------------------------
109
  with gr.Blocks() as demo:
110
+ gr.Markdown("### 🧠 Sentence-by-Sentence AI Check (Fakespot)")
111
 
112
  text_input = gr.Textbox(label="Paste text", lines=14, placeholder="Your content…")
113
  threshold = gr.Slider(0.50, 0.95, value=0.70, step=0.01, label="AI threshold")