Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import numpy as np | |
| def load_model(): | |
| model = AutoModelForSequenceClassification.from_pretrained("arxiv_model", | |
| trust_remote_code=True, | |
| local_files_only=True) | |
| tokenizer = AutoTokenizer.from_pretrained("arxiv_model", local_files_only=True) | |
| return model, tokenizer | |
| model, tokenizer = load_model() | |
| id2label = model.config.id2label | |
| st.title("🔬 ArXiv Article Classifier") | |
| st.markdown("Введите **название** и (по желанию) **аннотацию** статьи. Сервис предскажет вероятные темы!") | |
| title_input = st.text_input("Название статьи") | |
| abstract_input = st.text_area("Аннотация (необязательно)") | |
| if st.button("Классифицировать") and title_input: | |
| text = title_input.strip() | |
| if abstract_input.strip(): | |
| text += " " + abstract_input.strip() | |
| inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| probs = torch.nn.functional.softmax(outputs.logits, dim=1).numpy()[0] | |
| sorted_indices = np.argsort(probs)[::-1] | |
| # top_labels = [(id2label[str(i)], probs[i]) for i in sorted_indices] | |
| top_labels = [(id2label[i], probs[i]) for i in sorted_indices] | |
| cumulative = 0.0 | |
| top95 = [] | |
| for label, prob in top_labels: | |
| top95.append((label, prob)) | |
| cumulative += prob | |
| if cumulative >= 0.95: | |
| break | |
| st.markdown(f"### 🎯 Основная тема: `{top_labels[0][0]}` ({top_labels[0][1]*100:.2f}%)") | |
| st.markdown("### 📋 Категории (до 95% суммарной вероятности):") | |
| for label, prob in top95: | |
| st.write(f"- `{label}`: {prob*100:.2f}%") | |
| else: | |
| st.markdown("_Введите название статьи и нажмите кнопку выше_") | |