FedorX8 commited on
Commit
431c9c0
·
verified ·
1 Parent(s): aa53572

Upload 2 files

Browse files
Files changed (2) hide show
  1. main.py +54 -0
  2. requirements.txt +51 -0
main.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline, AutoTokenizer
3
+ import pandas as pd
4
+
5
+ @st.cache_resource
6
+ def load_model():
7
+ tokenizer = AutoTokenizer.from_pretrained('FedorX8/arxiv-classification-bert-uncased')
8
+ return pipeline(task='text-classification', model="FedorX8/arxiv-classification-bert-uncased", tokenizer=tokenizer, return_all_scores=True)
9
+
10
+ def get_top_p(result, top_p=0.95):
11
+ result = sorted(result, key=lambda x: x['score'], reverse=True)
12
+ prob_sum = 0
13
+ classes = []
14
+ probs = []
15
+ for elem in result:
16
+ score = elem['score']
17
+ label = elem['label']
18
+ if prob_sum < top_p:
19
+ prob_sum += score
20
+ probs.append(score)
21
+ classes.append(label)
22
+ return classes, probs
23
+
24
+
25
+ st_model = load_model()
26
+
27
+ st.header('Web interface for arXiv articles classification')
28
+
29
+ # Создание раскрывающегося текста
30
+ expander = st.expander("Click to read description of possible classes")
31
+ expander.markdown("""
32
+ 1. math.AC — Commutative Algebra
33
+ 2. cs.CV — Computer Vision and Pattern Recognition
34
+ 3. cs.AI — Artificial Intelligence
35
+ 4. cs.SY — Systems and Control
36
+ 5. math.GR — Group Theory
37
+ 6. cs.CE — Computational Engineering, Finance, and Science
38
+ 7. cs.PL — Programming Languages
39
+ 8. cs.IT — Information Theory
40
+ 9. cs.DS — Data Structures and Algorithms
41
+ 10. cs.NE — Neural and Evolutionary Computing
42
+ 11. math.ST — Statistics Theory
43
+ """)
44
+
45
+ query = st.text_input("Enter the text of the papper", value="AI")
46
+ if query:
47
+ result = st_model(query)
48
+ classes, probs = get_top_p(result[0])
49
+ data_dict = {
50
+ "classes": classes,
51
+ "probabilites": probs
52
+ }
53
+ df = pd.DataFrame(data_dict)
54
+ st.write(df)
requirements.txt ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.6.0
2
+ altair==5.5.0
3
+ attrs==25.3.0
4
+ blinker==1.9.0
5
+ cachetools==5.5.2
6
+ certifi==2025.1.31
7
+ charset-normalizer==3.4.1
8
+ click==8.1.8
9
+ filelock==3.18.0
10
+ fsspec==2025.3.2
11
+ gitdb==4.0.12
12
+ GitPython==3.1.44
13
+ huggingface-hub==0.30.1
14
+ idna==3.10
15
+ Jinja2==3.1.6
16
+ jsonschema==4.23.0
17
+ jsonschema-specifications==2024.10.1
18
+ MarkupSafe==3.0.2
19
+ mpmath==1.3.0
20
+ narwhals==1.33.0
21
+ networkx==3.4.2
22
+ numpy==2.2.4
23
+ packaging==24.2
24
+ pandas==2.2.3
25
+ pillow==11.1.0
26
+ protobuf==5.29.4
27
+ psutil==7.0.0
28
+ pyarrow==19.0.1
29
+ pydeck==0.9.1
30
+ python-dateutil==2.9.0.post0
31
+ pytz==2025.2
32
+ PyYAML==6.0.2
33
+ referencing==0.36.2
34
+ regex==2024.11.6
35
+ requests==2.32.3
36
+ rpds-py==0.24.0
37
+ safetensors==0.5.3
38
+ six==1.17.0
39
+ smmap==5.0.2
40
+ streamlit==1.44.1
41
+ sympy==1.13.1
42
+ tenacity==9.1.2
43
+ tokenizers==0.21.1
44
+ toml==0.10.2
45
+ torch==2.6.0
46
+ tornado==6.4.2
47
+ tqdm==4.67.1
48
+ transformers==4.51.0
49
+ typing_extensions==4.13.1
50
+ tzdata==2025.2
51
+ urllib3==2.3.0