VictorM-Coder commited on
Commit
6c56aca
·
verified ·
1 Parent(s): 958e345

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -1,9 +1,7 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
- import nltk
5
-
6
- nltk.download("punkt")
7
 
8
  # Load model
9
  MODEL = "roberta-base-openai-detector"
@@ -11,10 +9,13 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL)
11
  model = AutoModelForSequenceClassification.from_pretrained(MODEL)
12
 
13
  def detect_ai(text):
14
- sentences = nltk.sent_tokenize(text)
 
15
  results = []
16
 
17
  for sent in sentences:
 
 
18
  inputs = tokenizer(sent, return_tensors="pt", truncation=True, max_length=512)
19
  with torch.no_grad():
20
  outputs = model(**inputs)
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
+ import re
 
 
5
 
6
  # Load model
7
  MODEL = "roberta-base-openai-detector"
 
9
  model = AutoModelForSequenceClassification.from_pretrained(MODEL)
10
 
11
  def detect_ai(text):
12
+ # Split into rough sentences
13
+ sentences = re.split(r'(?<=[.!?]) +', text)
14
  results = []
15
 
16
  for sent in sentences:
17
+ if not sent.strip():
18
+ continue
19
  inputs = tokenizer(sent, return_tensors="pt", truncation=True, max_length=512)
20
  with torch.no_grad():
21
  outputs = model(**inputs)