import streamlit as st
from transformers import BertForSequenceClassification, BertTokenizer
import torch
import nltk

# Download sentence tokenizer data
nltk.download('punkt')
from nltk.tokenize import sent_tokenize

# Load model and tokenizer
@st.cache_resource()
def load_model():
    model = BertForSequenceClassification.from_pretrained("rrroby/insensitive-language-bert")
    tokenizer = BertTokenizer.from_pretrained("rrroby/insensitive-language-bert")
    return model, tokenizer

model, tokenizer = load_model()

# Page title and instructions
st.title("Disability Insensitive Language Detection V1.2")
st.write(
    """
    Paste your abstract or academic text below.
    Each sentence will be analyzed and flagged if any disability-insensitive language is detected.\n
    NOTE: The current model was trained on very little data and is still in the early stages, therefore, it is prone to inaccuracies.
    """
)

text = st.text_area("Enter text here:", height=250)

if st.button("Analyze"):
    if text.strip() == "":
        st.warning("Some text required for analysis")
    else:
        sentences = sent_tokenize(text)

        with st.spinner("Analyzing..."):
            inputs = tokenizer(sentences, return_tensors="pt", padding=True, truncation=True, max_length=512)
            with torch.no_grad():
                outputs = model(**inputs)
                probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
                pred_classes = torch.argmax(probs, dim=-1)

        for idx, sentence in enumerate(sentences):
            prob_not_insensitive = probs[idx][0].item() * 100
            prob_insensitive = probs[idx][1].item() * 100

            if pred_classes[idx] == 1:
                st.error(f"**Insensitive:** {sentence}")
            else:
                st.success(f"**Not insensitive:** {sentence}")

            st.caption(f"Model's Confidence — Not insensitive: {prob_not_insensitive:.2f}%, Insensitive: {prob_insensitive:.2f}%")