Spaces:
Runtime error
Runtime error
File size: 1,724 Bytes
684fbaf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import streamlit as st
from transformers import pipeline, AutoTokenizer
import pandas as pd
@st.cache_resource
def load_model():
tokenizer = AutoTokenizer.from_pretrained('FedorX8/arxiv-classification-bert-uncased')
return pipeline(task='text-classification', model="FedorX8/arxiv-classification-bert-uncased", tokenizer=tokenizer, return_all_scores=True)
def get_top_p(result, top_p=0.95):
result = sorted(result, key=lambda x: x['score'], reverse=True)
prob_sum = 0
classes = []
probs = []
for elem in result:
score = elem['score']
label = elem['label']
if prob_sum < top_p:
prob_sum += score
probs.append(score)
classes.append(label)
return classes, probs
st_model = load_model()
st.header('Web interface for arXiv articles classification')
# Создание раскрывающегося текста
expander = st.expander("Click to read description of possible classes")
expander.markdown("""
1. math.AC — Commutative Algebra
2. cs.CV — Computer Vision and Pattern Recognition
3. cs.AI — Artificial Intelligence
4. cs.SY — Systems and Control
5. math.GR — Group Theory
6. cs.CE — Computational Engineering, Finance, and Science
7. cs.PL — Programming Languages
8. cs.IT — Information Theory
9. cs.DS — Data Structures and Algorithms
10. cs.NE — Neural and Evolutionary Computing
11. math.ST — Statistics Theory
""")
query = st.text_input("Enter the text of the papper", value="AI")
if query:
result = st_model(query)
classes, probs = get_top_p(result[0])
data_dict = {
"classes": classes,
"probabilites": probs
}
df = pd.DataFrame(data_dict)
st.write(df) |