VictorM-Coder commited on
Commit
9267b26
·
verified ·
1 Parent(s): 76cdf27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -28
app.py CHANGED
@@ -5,75 +5,113 @@ import re
5
  import pandas as pd
6
  import gradio as gr
7
 
8
- MODEL_NAME = "dejanseo/ai-cop"
9
-
10
- # --- Load model ---
 
 
 
 
 
11
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
12
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
  dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported()) else torch.float32
14
  model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, torch_dtype=dtype)
15
  model.to(device).eval()
16
 
17
- def sent_tokenize(text):
18
- return [s for s in re.split(r'(?<=[\.!?])\s+', text.strip()) if s]
19
 
20
- # --- Inference function ---
 
 
 
 
 
 
 
 
 
 
21
  def classify_text(text):
22
  if not text.strip():
23
  return "⚠️ Please enter some text.", None, None
24
 
25
- sentences = sent_tokenize(text)
26
- if not sentences:
27
- return "⚠️ No sentences detected.", None, None
28
 
 
29
  inputs = tokenizer(
30
- sentences,
31
  return_tensors="pt",
32
  padding=True,
33
  truncation=True,
34
  max_length=model.config.max_position_embeddings
35
  ).to(device)
36
 
 
37
  with torch.no_grad():
38
  logits = model(**inputs).logits
39
  probs = F.softmax(logits, dim=-1).cpu()
40
  preds = torch.argmax(probs, dim=-1).cpu()
41
 
 
 
 
42
  results = []
43
- highlighted_sentences = []
44
- for i, s in enumerate(sentences):
45
- p = preds[i].item()
46
- conf = probs[i, p].item()
47
- label = "AI" if p == 0 else "Human"
 
 
 
48
 
49
- results.append([s, label, f"{conf:.2f}"])
 
 
50
  if label == "AI":
51
- highlighted_sentences.append(f"<span style='color:red; font-weight:bold'>{s}</span>")
 
 
52
  else:
53
- highlighted_sentences.append(f"<span style='color:green; font-weight:bold'>{s}</span>")
 
 
54
 
55
- # Overall AI likelihood
 
 
56
  avg = torch.mean(probs, dim=0)
57
- model_ai = avg[0].item() * 100
 
 
 
58
 
59
- highlighted_text = " ".join(highlighted_sentences)
60
- df = pd.DataFrame(results, columns=["Sentence", "Classification", "Confidence"])
61
- return f"⚖️ AI Likelihood: {model_ai:.1f}%", highlighted_text, df
62
 
63
- # --- Gradio Interface ---
 
 
 
64
  with gr.Blocks() as demo:
65
- gr.Markdown("## 🧠 AI Article Detection by Writenix")
66
 
67
  with gr.Row():
68
- text_input = gr.Textbox(label="Enter text", lines=10, placeholder="Paste your text here…")
 
 
 
 
69
 
70
- classify_btn = gr.Button("Detect AI")
71
 
72
  ai_score = gr.Label(label="Overall AI Likelihood")
73
  highlighted = gr.HTML()
74
- table = gr.Dataframe(headers=["Sentence", "Classification", "Confidence"], wrap=True)
75
 
76
  classify_btn.click(classify_text, inputs=text_input, outputs=[ai_score, highlighted, table])
77
 
 
78
  if __name__ == "__main__":
79
  demo.launch()
 
5
  import pandas as pd
6
  import gradio as gr
7
 
8
+ # -----------------------------
9
+ # STRONGEST MODEL
10
+ # -----------------------------
11
+ MODEL_NAME = "Hello-SimpleAI/HC3-Plus-OpenAI-Detector"
12
+
13
+ # -----------------------------
14
+ # LOAD MODEL
15
+ # -----------------------------
16
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
17
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
  dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported()) else torch.float32
19
  model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, torch_dtype=dtype)
20
  model.to(device).eval()
21
 
 
 
22
 
23
+ # -----------------------------
24
+ # PARAGRAPH TOKENIZER
25
+ # -----------------------------
26
+ def paragraph_split(text):
27
+ paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
28
+ return paragraphs
29
+
30
+
31
+ # -----------------------------
32
+ # INFERENCE FUNCTION
33
+ # -----------------------------
34
  def classify_text(text):
35
  if not text.strip():
36
  return "⚠️ Please enter some text.", None, None
37
 
38
+ paragraphs = paragraph_split(text)
39
+ if not paragraphs:
40
+ return "⚠️ No paragraphs detected.", None, None
41
 
42
+ # Tokenize paragraphs
43
  inputs = tokenizer(
44
+ paragraphs,
45
  return_tensors="pt",
46
  padding=True,
47
  truncation=True,
48
  max_length=model.config.max_position_embeddings
49
  ).to(device)
50
 
51
+ # Predict
52
  with torch.no_grad():
53
  logits = model(**inputs).logits
54
  probs = F.softmax(logits, dim=-1).cpu()
55
  preds = torch.argmax(probs, dim=-1).cpu()
56
 
57
+ # -----------------------------
58
+ # BUILD RESULTS
59
+ # -----------------------------
60
  results = []
61
+ highlighted_paragraphs = []
62
+
63
+ for i, p in enumerate(paragraphs):
64
+ pred_label = preds[i].item()
65
+ confidence = probs[i, pred_label].item()
66
+
67
+ label = "AI" if pred_label == 0 else "Human"
68
+ conf_text = f"{confidence:.2f}"
69
 
70
+ results.append([p, label, conf_text])
71
+
72
+ # Highlighting
73
  if label == "AI":
74
+ highlighted_paragraphs.append(
75
+ f"<p style='color:red; font-weight:bold; margin-bottom:10px'>{p}</p>"
76
+ )
77
  else:
78
+ highlighted_paragraphs.append(
79
+ f"<p style='color:green; font-weight:bold; margin-bottom:10px'>{p}</p>"
80
+ )
81
 
82
+ # -----------------------------
83
+ # DOCUMENT LEVEL SCORE
84
+ # -----------------------------
85
  avg = torch.mean(probs, dim=0)
86
+ ai_likelihood = avg[0].item() * 100 # class 0 = AI
87
+
88
+ highlighted_html = "\n".join(highlighted_paragraphs)
89
+ df = pd.DataFrame(results, columns=["Paragraph", "Classification", "Confidence"])
90
 
91
+ return f"⚖️ Document AI Likelihood: {ai_likelihood:.1f}%", highlighted_html, df
 
 
92
 
93
+
94
+ # -----------------------------
95
+ # GRADIO INTERFACE
96
+ # -----------------------------
97
  with gr.Blocks() as demo:
98
+ gr.Markdown("## 🧠 Writenix Advanced AI Detection (Paragraph-Level)")
99
 
100
  with gr.Row():
101
+ text_input = gr.Textbox(
102
+ label="Enter text",
103
+ lines=14,
104
+ placeholder="Paste your essay, article, or content here…"
105
+ )
106
 
107
+ classify_btn = gr.Button("🚀 Detect AI")
108
 
109
  ai_score = gr.Label(label="Overall AI Likelihood")
110
  highlighted = gr.HTML()
111
+ table = gr.Dataframe(headers=["Paragraph", "Classification", "Confidence"], wrap=True)
112
 
113
  classify_btn.click(classify_text, inputs=text_input, outputs=[ai_score, highlighted, table])
114
 
115
+
116
  if __name__ == "__main__":
117
  demo.launch()