import streamlit as st
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import re
from nltk.tokenize import RegexpTokenizer
from bs4 import BeautifulSoup as bs
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')
nltk.download('punkt')
import pandas as pd

# Замените 'username/имя-вашей-модели' на путь к вашей модели на Hugging Face
model_name = 'Yerzhxn/class_vac'

# Загрузка токенизатора и модели
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Перемещение модели на устройство (если есть GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def preprocess(sentence):
    soup = bs(sentence, features="html.parser")
    sentence = soup.get_text()
    soup = bs(sentence, features="html.parser")
    sentence = soup.get_text()

    sentence = str(sentence)
    sentence = sentence.lower()
    sentence = sentence.replace('{html}',"")
    cleanr = re.compile('<.*?>')
    cleantext = re.sub(cleanr, '', sentence)
    rem_url = re.sub(r'http\S+', '',cleantext)
    rem_num = re.sub('[0-9]+', '', rem_url)
    tokenizer = RegexpTokenizer(r'\w+')
    tokens = tokenizer.tokenize(rem_num)
    filtered_words = [w for w in tokens if not w in stopwords.words('russian')]

    return " ".join(filtered_words)

# Интерфейс Streamlit
st.title("Тестирование классификации текста")
st.write("Введите текст, чтобы узнать предсказанный класс.")

# Поле ввода текста
input_text = st.text_area("Введите текст здесь", "")
df = pd.read_excel('me.xlsx')
if st.button("Предсказать"):
  if input_text:
    input_text =  preprocess(input_text)
    # Преобразование текста в формат, подходящий для модели
    inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
    inputs = {key: value.to(device) for key, value in inputs.items()}

    # Прогон текста через модель и получение предсказания
    with torch.no_grad():
        outputs = model(**inputs)

    # Преобразование выходных данных в вероятности
    logits = outputs.logits
    probabilities = F.softmax(logits, dim=1)

    # Определение класса и его вероятности
    max_prob, predicted_class = torch.max(probabilities, dim=1)
      

    # Проверка вероятности для отображения результата
    if max_prob.item() > 0.35:
        st.write(f"Предсказанный класс: {predicted_class.item()}, вероятность: {max_prob.item():.2f}")
        dataframe = df[df['label']==predicted_class.item()]
        str1 = dataframe['PROF_NAME']
        st.write(str1.iloc[0])
    else:
        st.write("Модель не уверена в предсказании (вероятность меньше 35%).")
else:
    st.write("Пожалуйста, введите текст для классификации.")