VictorM-Coder commited on
Commit
eba685c
·
verified ·
1 Parent(s): ca7583b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -44
app.py CHANGED
@@ -6,10 +6,11 @@ import pandas as pd
6
  import gradio as gr
7
 
8
  # -----------------------------
9
- # STRONGEST AVAILABLE PUBLIC MODEL (WORKING)
10
  # -----------------------------
11
  MODEL_NAME = "openai-community/roberta-base-openai-detector"
12
 
 
13
  # -----------------------------
14
  # LOAD MODEL
15
  # -----------------------------
@@ -21,11 +22,10 @@ model.to(device).eval()
21
 
22
 
23
  # -----------------------------
24
- # PARAGRAPH SPLITTER
25
  # -----------------------------
26
- def paragraph_split(text):
27
- paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
28
- return paragraphs
29
 
30
 
31
  # -----------------------------
@@ -35,81 +35,71 @@ def classify_text(text):
35
  if not text.strip():
36
  return "⚠️ Please enter some text.", None, None
37
 
38
- paragraphs = paragraph_split(text)
39
- if not paragraphs:
40
- return "⚠️ No paragraphs detected.", None, None
41
 
42
- # Tokenize paragraphs
43
  inputs = tokenizer(
44
- paragraphs,
45
  return_tensors="pt",
46
  padding=True,
47
- truncation=True,
48
- max_length=model.config.max_position_embeddings
49
  ).to(device)
50
 
51
- # Predict
52
  with torch.no_grad():
53
  logits = model(**inputs).logits
54
  probs = F.softmax(logits, dim=-1).cpu()
55
  preds = torch.argmax(probs, dim=-1).cpu()
56
 
57
- # -----------------------------
58
- # BUILD RESULTS
59
- # -----------------------------
60
  results = []
61
- highlighted_paragraphs = []
62
-
63
- for i, p in enumerate(paragraphs):
64
- pred_label = preds[i].item()
65
- confidence = probs[i, pred_label].item()
66
 
67
- label = "AI" if pred_label == 1 else "Human"
68
- # NOTE: roberta-base-openai-detector → label 1 = Fake (AI), 0 = Real (Human)
 
69
 
70
- conf_text = f"{confidence:.2f}"
 
 
71
 
72
- results.append([p, label, conf_text])
73
 
74
  if label == "AI":
75
- highlighted_paragraphs.append(
76
- f"<p style='color:red; font-weight:bold; margin-bottom:10px'>{p}</p>"
77
- )
78
  else:
79
- highlighted_paragraphs.append(
80
- f"<p style='color:green; font-weight:bold; margin-bottom:10px'>{p}</p>"
81
- )
82
 
83
  # -----------------------------
84
- # DOCUMENT LEVEL SCORE
85
  # -----------------------------
86
  avg = torch.mean(probs, dim=0)
87
- ai_likelihood = avg[1].item() * 100 # class 1 = AI
88
 
89
- highlighted_html = "\n".join(highlighted_paragraphs)
90
- df = pd.DataFrame(results, columns=["Paragraph", "Classification", "Confidence"])
91
 
92
- return f"⚖️ Document AI Likelihood: {ai_likelihood:.1f}%", highlighted_html, df
93
 
94
 
95
  # -----------------------------
96
  # GRADIO UI
97
  # -----------------------------
98
  with gr.Blocks() as demo:
99
- gr.Markdown("## 🧠 Writenix Advanced AI Detection (Paragraph-Level)")
100
 
101
- with gr.Row():
102
- text_input = gr.Textbox(
103
- label="Enter text",
104
- lines=14,
105
- placeholder="Paste your essay, article, or content here…"
106
- )
107
 
108
  classify_btn = gr.Button("🚀 Detect AI")
109
 
110
  ai_score = gr.Label(label="Overall AI Likelihood")
111
  highlighted = gr.HTML()
112
- table = gr.Dataframe(headers=["Paragraph", "Classification", "Confidence"], wrap=True)
113
 
114
  classify_btn.click(classify_text, inputs=text_input, outputs=[ai_score, highlighted, table])
115
 
 
6
  import gradio as gr
7
 
8
  # -----------------------------
9
+ # WORKING PUBLIC AI DETECTOR
10
  # -----------------------------
11
  MODEL_NAME = "openai-community/roberta-base-openai-detector"
12
 
13
+
14
  # -----------------------------
15
  # LOAD MODEL
16
  # -----------------------------
 
22
 
23
 
24
  # -----------------------------
25
+ # LINE SPLITTER (SAFE, FIXED)
26
  # -----------------------------
27
+ def line_split(text):
28
+ return [l.strip() for l in text.split("\n") if l.strip()]
 
29
 
30
 
31
  # -----------------------------
 
35
  if not text.strip():
36
  return "⚠️ Please enter some text.", None, None
37
 
38
+ lines = line_split(text)
39
+ if not lines:
40
+ return "⚠️ No content detected.", None, None
41
 
42
+ # Tokenize line by line → SAFE
43
  inputs = tokenizer(
44
+ lines,
45
  return_tensors="pt",
46
  padding=True,
47
+ truncation=True,
48
+ max_length=512 # SAFE for RoBERTa
49
  ).to(device)
50
 
 
51
  with torch.no_grad():
52
  logits = model(**inputs).logits
53
  probs = F.softmax(logits, dim=-1).cpu()
54
  preds = torch.argmax(probs, dim=-1).cpu()
55
 
 
 
 
56
  results = []
57
+ highlighted_lines = []
 
 
 
 
58
 
59
+ for i, line in enumerate(lines):
60
+ pred = preds[i].item()
61
+ conf = probs[i, pred].item()
62
 
63
+ # For this model: 1 = AI, 0 = Human
64
+ label = "AI" if pred == 1 else "Human"
65
+ conf_text = f"{conf:.2f}"
66
 
67
+ results.append([line, label, conf_text])
68
 
69
  if label == "AI":
70
+ highlighted_lines.append(f"<p style='color:red; font-weight:bold'>{line}</p>")
 
 
71
  else:
72
+ highlighted_lines.append(f"<p style='color:green; font-weight:bold'>{line}</p>")
 
 
73
 
74
  # -----------------------------
75
+ # DOCUMENT AI SCORE
76
  # -----------------------------
77
  avg = torch.mean(probs, dim=0)
78
+ ai_percent = avg[1].item() * 100 # class 1 = AI
79
 
80
+ highlighted_html = "\n".join(highlighted_lines)
81
+ df = pd.DataFrame(results, columns=["Line", "Classification", "Confidence"])
82
 
83
+ return f"⚖️ Document AI Likelihood: {ai_percent:.1f}%", highlighted_html, df
84
 
85
 
86
  # -----------------------------
87
  # GRADIO UI
88
  # -----------------------------
89
  with gr.Blocks() as demo:
90
+ gr.Markdown("## 🧠 Writenix AI Detector (Line-Level, Stable Version)")
91
 
92
+ text_input = gr.Textbox(
93
+ label="Enter text",
94
+ lines=14,
95
+ placeholder="Paste your essay, article, or content here…"
96
+ )
 
97
 
98
  classify_btn = gr.Button("🚀 Detect AI")
99
 
100
  ai_score = gr.Label(label="Overall AI Likelihood")
101
  highlighted = gr.HTML()
102
+ table = gr.Dataframe(headers=["Line", "Classification", "Confidence"], wrap=True)
103
 
104
  classify_btn.click(classify_text, inputs=text_input, outputs=[ai_score, highlighted, table])
105