Update app.py
Browse files
app.py
CHANGED
|
@@ -2,6 +2,7 @@ import gradio as gr
|
|
| 2 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 3 |
import torch
|
| 4 |
import torch.nn.functional as F
|
|
|
|
| 5 |
|
| 6 |
LABELS = ["astro-ph", "cond-mat", "cs", "econ", "eess", "gr-qc",
|
| 7 |
"hep-ex", "hep-lat", "hep-ph", "hep-th", "math", "math-ph",
|
|
@@ -24,6 +25,10 @@ def load_model():
|
|
| 24 |
)
|
| 25 |
model.eval()
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
def predict(title: str, abstract: str):
|
| 29 |
if not title.strip():
|
|
@@ -31,6 +36,9 @@ def predict(title: str, abstract: str):
|
|
| 31 |
|
| 32 |
load_model()
|
| 33 |
|
|
|
|
|
|
|
|
|
|
| 34 |
text = f"{title} [SEP] {abstract}" if abstract.strip() else title
|
| 35 |
inputs = tokenizer(
|
| 36 |
text,
|
|
|
|
| 2 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 3 |
import torch
|
| 4 |
import torch.nn.functional as F
|
| 5 |
+
import re
|
| 6 |
|
| 7 |
LABELS = ["astro-ph", "cond-mat", "cs", "econ", "eess", "gr-qc",
|
| 8 |
"hep-ex", "hep-lat", "hep-ph", "hep-th", "math", "math-ph",
|
|
|
|
| 25 |
)
|
| 26 |
model.eval()
|
| 27 |
|
| 28 |
+
def clean_text(text):
|
| 29 |
+
text = text.replace('\n', ' ')
|
| 30 |
+
text = re.sub(r'\s+', ' ', text)
|
| 31 |
+
return text.strip()
|
| 32 |
|
| 33 |
def predict(title: str, abstract: str):
|
| 34 |
if not title.strip():
|
|
|
|
| 36 |
|
| 37 |
load_model()
|
| 38 |
|
| 39 |
+
title = clean_text(title)
|
| 40 |
+
abstract = clean_text(abstract)
|
| 41 |
+
|
| 42 |
text = f"{title} [SEP] {abstract}" if abstract.strip() else title
|
| 43 |
inputs = tokenizer(
|
| 44 |
text,
|