VictorM-Coder commited on
Commit
f2f742a
·
verified ·
1 Parent(s): eba685c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -21
app.py CHANGED
@@ -10,7 +10,6 @@ import gradio as gr
10
  # -----------------------------
11
  MODEL_NAME = "openai-community/roberta-base-openai-detector"
12
 
13
-
14
  # -----------------------------
15
  # LOAD MODEL
16
  # -----------------------------
@@ -22,10 +21,17 @@ model.to(device).eval()
22
 
23
 
24
  # -----------------------------
25
- # LINE SPLITTER (SAFE, FIXED)
26
  # -----------------------------
27
- def line_split(text):
28
- return [l.strip() for l in text.split("\n") if l.strip()]
 
 
 
 
 
 
 
29
 
30
 
31
  # -----------------------------
@@ -35,17 +41,17 @@ def classify_text(text):
35
  if not text.strip():
36
  return "⚠️ Please enter some text.", None, None
37
 
38
- lines = line_split(text)
39
- if not lines:
40
  return "⚠️ No content detected.", None, None
41
 
42
- # Tokenize line by line → SAFE
43
  inputs = tokenizer(
44
- lines,
45
  return_tensors="pt",
46
  padding=True,
47
- truncation=True,
48
- max_length=512 # SAFE for RoBERTa
49
  ).to(device)
50
 
51
  with torch.no_grad():
@@ -54,31 +60,31 @@ def classify_text(text):
54
  preds = torch.argmax(probs, dim=-1).cpu()
55
 
56
  results = []
57
- highlighted_lines = []
58
 
59
- for i, line in enumerate(lines):
60
  pred = preds[i].item()
61
  conf = probs[i, pred].item()
62
 
63
- # For this model: 1 = AI, 0 = Human
64
  label = "AI" if pred == 1 else "Human"
65
  conf_text = f"{conf:.2f}"
66
 
67
- results.append([line, label, conf_text])
68
 
69
  if label == "AI":
70
- highlighted_lines.append(f"<p style='color:red; font-weight:bold'>{line}</p>")
71
  else:
72
- highlighted_lines.append(f"<p style='color:green; font-weight:bold'>{line}</p>")
73
 
74
  # -----------------------------
75
  # DOCUMENT AI SCORE
76
  # -----------------------------
77
  avg = torch.mean(probs, dim=0)
78
- ai_percent = avg[1].item() * 100 # class 1 = AI
79
 
80
- highlighted_html = "\n".join(highlighted_lines)
81
- df = pd.DataFrame(results, columns=["Line", "Classification", "Confidence"])
82
 
83
  return f"⚖️ Document AI Likelihood: {ai_percent:.1f}%", highlighted_html, df
84
 
@@ -87,7 +93,7 @@ def classify_text(text):
87
  # GRADIO UI
88
  # -----------------------------
89
  with gr.Blocks() as demo:
90
- gr.Markdown("## 🧠 Writenix AI Detector (Line-Level, Stable Version)")
91
 
92
  text_input = gr.Textbox(
93
  label="Enter text",
@@ -99,7 +105,7 @@ with gr.Blocks() as demo:
99
 
100
  ai_score = gr.Label(label="Overall AI Likelihood")
101
  highlighted = gr.HTML()
102
- table = gr.Dataframe(headers=["Line", "Classification", "Confidence"], wrap=True)
103
 
104
  classify_btn.click(classify_text, inputs=text_input, outputs=[ai_score, highlighted, table])
105
 
 
10
  # -----------------------------
11
  MODEL_NAME = "openai-community/roberta-base-openai-detector"
12
 
 
13
  # -----------------------------
14
  # LOAD MODEL
15
  # -----------------------------
 
21
 
22
 
23
  # -----------------------------
24
+ # SENTENCE SPLITTER (SAFE)
25
  # -----------------------------
26
+ def sentence_split(text):
27
+ # Replace newlines with periods to avoid broken sentences
28
+ text = text.replace("\n", ". ")
29
+
30
+ # Regex split on . ! ? but keep them
31
+ sentences = re.split(r'(?<=[.!?])\s+', text)
32
+
33
+ # Clean and filter
34
+ return [s.strip() for s in sentences if s.strip()]
35
 
36
 
37
  # -----------------------------
 
41
  if not text.strip():
42
  return "⚠️ Please enter some text.", None, None
43
 
44
+ sentences = sentence_split(text)
45
+ if not sentences:
46
  return "⚠️ No content detected.", None, None
47
 
48
+ # Tokenize per sentence
49
  inputs = tokenizer(
50
+ sentences,
51
  return_tensors="pt",
52
  padding=True,
53
+ truncation=True,
54
+ max_length=512
55
  ).to(device)
56
 
57
  with torch.no_grad():
 
60
  preds = torch.argmax(probs, dim=-1).cpu()
61
 
62
  results = []
63
+ highlighted_sentences = []
64
 
65
+ for i, sentence in enumerate(sentences):
66
  pred = preds[i].item()
67
  conf = probs[i, pred].item()
68
 
69
+ # Model: 1 = AI, 0 = Human
70
  label = "AI" if pred == 1 else "Human"
71
  conf_text = f"{conf:.2f}"
72
 
73
+ results.append([sentence, label, conf_text])
74
 
75
  if label == "AI":
76
+ highlighted_sentences.append(f"<p style='color:red; font-weight:bold'>{sentence}</p>")
77
  else:
78
+ highlighted_sentences.append(f"<p style='color:green; font-weight:bold'>{sentence}</p>")
79
 
80
  # -----------------------------
81
  # DOCUMENT AI SCORE
82
  # -----------------------------
83
  avg = torch.mean(probs, dim=0)
84
+ ai_percent = avg[1].item() * 100
85
 
86
+ highlighted_html = "\n".join(highlighted_sentences)
87
+ df = pd.DataFrame(results, columns=["Sentence", "Classification", "Confidence"])
88
 
89
  return f"⚖️ Document AI Likelihood: {ai_percent:.1f}%", highlighted_html, df
90
 
 
93
  # GRADIO UI
94
  # -----------------------------
95
  with gr.Blocks() as demo:
96
+ gr.Markdown("## 🧠 Writenix AI Detector (Sentence-Level, Stable Version)")
97
 
98
  text_input = gr.Textbox(
99
  label="Enter text",
 
105
 
106
  ai_score = gr.Label(label="Overall AI Likelihood")
107
  highlighted = gr.HTML()
108
+ table = gr.Dataframe(headers=["Sentence", "Classification", "Confidence"], wrap=True)
109
 
110
  classify_btn.click(classify_text, inputs=text_input, outputs=[ai_score, highlighted, table])
111