FedorX8 commited on
Commit
684fbaf
·
verified ·
1 Parent(s): 431c9c0

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -0
app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline, AutoTokenizer
3
+ import pandas as pd
4
+
5
+ @st.cache_resource
6
+ def load_model():
7
+ tokenizer = AutoTokenizer.from_pretrained('FedorX8/arxiv-classification-bert-uncased')
8
+ return pipeline(task='text-classification', model="FedorX8/arxiv-classification-bert-uncased", tokenizer=tokenizer, return_all_scores=True)
9
+
10
+ def get_top_p(result, top_p=0.95):
11
+ result = sorted(result, key=lambda x: x['score'], reverse=True)
12
+ prob_sum = 0
13
+ classes = []
14
+ probs = []
15
+ for elem in result:
16
+ score = elem['score']
17
+ label = elem['label']
18
+ if prob_sum < top_p:
19
+ prob_sum += score
20
+ probs.append(score)
21
+ classes.append(label)
22
+ return classes, probs
23
+
24
+
25
+ st_model = load_model()
26
+
27
+ st.header('Web interface for arXiv articles classification')
28
+
29
+ # Создание раскрывающегося текста
30
+ expander = st.expander("Click to read description of possible classes")
31
+ expander.markdown("""
32
+ 1. math.AC — Commutative Algebra
33
+ 2. cs.CV — Computer Vision and Pattern Recognition
34
+ 3. cs.AI — Artificial Intelligence
35
+ 4. cs.SY — Systems and Control
36
+ 5. math.GR — Group Theory
37
+ 6. cs.CE — Computational Engineering, Finance, and Science
38
+ 7. cs.PL — Programming Languages
39
+ 8. cs.IT — Information Theory
40
+ 9. cs.DS — Data Structures and Algorithms
41
+ 10. cs.NE — Neural and Evolutionary Computing
42
+ 11. math.ST — Statistics Theory
43
+ """)
44
+
45
+ query = st.text_input("Enter the text of the papper", value="AI")
46
+ if query:
47
+ result = st_model(query)
48
+ classes, probs = get_top_p(result[0])
49
+ data_dict = {
50
+ "classes": classes,
51
+ "probabilites": probs
52
+ }
53
+ df = pd.DataFrame(data_dict)
54
+ st.write(df)