TochkaMikelya commited on
Commit
a3b0757
·
verified ·
1 Parent(s): 89e0dae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -0
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
  import torch.nn.functional as F
 
5
 
6
  LABELS = ["astro-ph", "cond-mat", "cs", "econ", "eess", "gr-qc",
7
  "hep-ex", "hep-lat", "hep-ph", "hep-th", "math", "math-ph",
@@ -24,6 +25,10 @@ def load_model():
24
  )
25
  model.eval()
26
 
 
 
 
 
27
 
28
  def predict(title: str, abstract: str):
29
  if not title.strip():
@@ -31,6 +36,9 @@ def predict(title: str, abstract: str):
31
 
32
  load_model()
33
 
 
 
 
34
  text = f"{title} [SEP] {abstract}" if abstract.strip() else title
35
  inputs = tokenizer(
36
  text,
 
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
  import torch.nn.functional as F
5
+ import re
6
 
7
  LABELS = ["astro-ph", "cond-mat", "cs", "econ", "eess", "gr-qc",
8
  "hep-ex", "hep-lat", "hep-ph", "hep-th", "math", "math-ph",
 
25
  )
26
  model.eval()
27
 
28
+ def clean_text(text):
29
+ text = text.replace('\n', ' ')
30
+ text = re.sub(r'\s+', ' ', text)
31
+ return text.strip()
32
 
33
  def predict(title: str, abstract: str):
34
  if not title.strip():
 
36
 
37
  load_model()
38
 
39
+ title = clean_text(title)
40
+ abstract = clean_text(abstract)
41
+
42
  text = f"{title} [SEP] {abstract}" if abstract.strip() else title
43
  inputs = tokenizer(
44
  text,