Spaces:

rrroby
/

Insensitive_Lang_DetectionV1

Sleeping

App Files Files Community

Insensitive_Lang_DetectionV1 / modelV1app.py

rrroby

Update modelV1app.py

d3fdf1d verified 8 months ago

raw

history blame contribute delete

1.98 kB

	import streamlit as st
	from transformers import BertForSequenceClassification, BertTokenizer
	import torch
	import nltk

	# Download sentence tokenizer data
	nltk.download('punkt')
	from nltk.tokenize import sent_tokenize

	# Load model and tokenizer
	@st.cache_resource()
	def load_model():
	model = BertForSequenceClassification.from_pretrained("rrroby/insensitive-language-bert")
	tokenizer = BertTokenizer.from_pretrained("rrroby/insensitive-language-bert")
	return model, tokenizer

	model, tokenizer = load_model()

	# Page title and instructions
	st.title("Disability Insensitive Language Detection V1.2")
	st.write(
	"""
	Paste your abstract or academic text below.
	Each sentence will be analyzed and flagged if any disability-insensitive language is detected.\n
	NOTE: The current model was trained on very little data and is still in the early stages, therefore, it is prone to inaccuracies.
	"""
	)

	text = st.text_area("Enter text here:", height=250)

	if st.button("Analyze"):
	if text.strip() == "":
	st.warning("Some text required for analysis")
	else:
	sentences = sent_tokenize(text)

	with st.spinner("Analyzing..."):
	inputs = tokenizer(sentences, return_tensors="pt", padding=True, truncation=True, max_length=512)
	with torch.no_grad():
	outputs = model(**inputs)
	probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
	pred_classes = torch.argmax(probs, dim=-1)

	for idx, sentence in enumerate(sentences):
	prob_not_insensitive = probs[idx][0].item() * 100
	prob_insensitive = probs[idx][1].item() * 100

	if pred_classes[idx] == 1:
	st.error(f"Insensitive: {sentence}")
	else:
	st.success(f"Not insensitive: {sentence}")

	st.caption(f"Model's Confidence — Not insensitive: {prob_not_insensitive:.2f}%, Insensitive: {prob_insensitive:.2f}%")