Spaces:

osheina
/

NLP_project

Sleeping

App Files Files Community

NLP_project / toxic1.py

osheina

Upload 16 files

f987f4c verified almost 2 years ago

raw

history blame contribute delete

1.82 kB

	# toxic.py
	import streamlit as st
	import numpy as np
	import pandas as pd
	import time
	import torch
	from transformers import AutoTokenizer, AutoModelForSequenceClassification

	# Ensure your model and tokenizer paths are correct and accessible by the Streamlit app.
	# Since you're importing this into another file, relative or absolute paths might need to be updated accordingly.
	model_t_checkpoint = 'cointegrated/rubert-tiny-toxicity'
	tokenizer_t = AutoTokenizer.from_pretrained(model_t_checkpoint)
	model_t = AutoModelForSequenceClassification.from_pretrained(model_t_checkpoint)

	def text2toxicity(text, aggregate=True):
	with torch.no_grad():
	inputs = tokenizer_t(text, return_tensors='pt', truncation=True, padding=True).to('cpu')
	proba = torch.sigmoid(model_t(**inputs).logits).cpu().numpy()
	if isinstance(text, str):
	proba = proba[0]
	if aggregate:
	return 1 - proba.T[0] * (1 - proba.T[-1])
	return proba

	def toxicity_page():
	st.title("""
	Определим токсичный комментарий или нет
	""")

	user_text_input = st.text_area('Введите ваш отзыв здесь:')

	if st.button('Предсказать'):
	start_time = time.time()
	proba = text2toxicity(user_text_input, True)
	end_time = time.time()
	prediction_time = end_time - start_time

	if proba >= 0.5:
	st.write(f'Степень токсичности комментария: {round(proba, 2)} – комментарий токсичный.')
	else:
	st.write(f'Степень токсичности комментария: {round(proba, 2)} – комментарий не токсичный.')
	st.write(f'Время предсказания: {prediction_time:.4f} секунд')