rrroby commited on
Commit
d3fdf1d
·
verified ·
1 Parent(s): 165fadc

Update modelV1app.py

Browse files
Files changed (1) hide show
  1. modelV1app.py +53 -53
modelV1app.py CHANGED
@@ -1,53 +1,53 @@
1
- import streamlit as st
2
- from transformers import BertForSequenceClassification, BertTokenizer
3
- import torch
4
-
5
- # Load model and tokenizer from Hugging Face
6
- @st.cache_resource()
7
- def load_model():
8
- model = BertForSequenceClassification.from_pretrained("rrroby/insensitive-language-bert")
9
- tokenizer = BertTokenizer.from_pretrained("rrroby/insensitive-language-bert")
10
- return model, tokenizer
11
-
12
- model, tokenizer = load_model()
13
-
14
- # App title and description
15
- st.title("Disability Insensitive Language Detection V1.2")
16
- st.write(
17
- """
18
- Paste your abstract or academic text below.
19
- It will be analyzed and flagged if any disability-insensitive language is detected.\n
20
- NOTE: The current model was trained on very little data and is still in the early stages, therefore, it is prone to inaccuracies.
21
- """
22
- )
23
-
24
- # User input box
25
- text = st.text_area("Enter text here:", height=300)
26
-
27
- # Analyze button
28
- if st.button("Analyze"):
29
- if text.strip() == "":
30
- st.warning("Some text required for analysis")
31
- else:
32
- # Split text into sentences to handle for multiple
33
- sentences = [s.strip() for s in text.split(".") if s.strip()]
34
-
35
- # Tokenize as batch
36
- inputs = tokenizer(sentences, return_tensors="pt", padding=True, truncation=True, max_length=512)
37
-
38
- # Predict cureent set of sentences
39
- with torch.no_grad():
40
- outputs = model(**inputs)
41
- probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
42
- pred_classes = torch.argmax(probs, dim=-1)
43
-
44
- for idx, sentence in enumerate(sentences):
45
- prob_not_insensitive = probs[idx][0].item() * 100
46
- prob_insensitive = probs[idx][1].item() * 100
47
-
48
- if pred_classes[idx] == 1:
49
- st.error(f"**Insensitive:** {sentence}")
50
- else:
51
- st.success(f"**Not insensitive:** {sentence}")
52
-
53
- st.caption(f"Model's Confidence — Not insensitive: {prob_not_insensitive:.2f}%, Insensitive: {prob_insensitive:.2f}%")
 
1
+ import streamlit as st
2
+ from transformers import BertForSequenceClassification, BertTokenizer
3
+ import torch
4
+ import nltk
5
+
6
+ # Download sentence tokenizer data
7
+ nltk.download('punkt')
8
+ from nltk.tokenize import sent_tokenize
9
+
10
+ # Load model and tokenizer
11
+ @st.cache_resource()
12
+ def load_model():
13
+ model = BertForSequenceClassification.from_pretrained("rrroby/insensitive-language-bert")
14
+ tokenizer = BertTokenizer.from_pretrained("rrroby/insensitive-language-bert")
15
+ return model, tokenizer
16
+
17
+ model, tokenizer = load_model()
18
+
19
+ # Page title and instructions
20
+ st.title("Disability Insensitive Language Detection V1.2")
21
+ st.write(
22
+ """
23
+ Paste your abstract or academic text below.
24
+ Each sentence will be analyzed and flagged if any disability-insensitive language is detected.\n
25
+ NOTE: The current model was trained on very little data and is still in the early stages, therefore, it is prone to inaccuracies.
26
+ """
27
+ )
28
+
29
+ text = st.text_area("Enter text here:", height=250)
30
+
31
+ if st.button("Analyze"):
32
+ if text.strip() == "":
33
+ st.warning("Some text required for analysis")
34
+ else:
35
+ sentences = sent_tokenize(text)
36
+
37
+ with st.spinner("Analyzing..."):
38
+ inputs = tokenizer(sentences, return_tensors="pt", padding=True, truncation=True, max_length=512)
39
+ with torch.no_grad():
40
+ outputs = model(**inputs)
41
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
42
+ pred_classes = torch.argmax(probs, dim=-1)
43
+
44
+ for idx, sentence in enumerate(sentences):
45
+ prob_not_insensitive = probs[idx][0].item() * 100
46
+ prob_insensitive = probs[idx][1].item() * 100
47
+
48
+ if pred_classes[idx] == 1:
49
+ st.error(f"**Insensitive:** {sentence}")
50
+ else:
51
+ st.success(f"**Not insensitive:** {sentence}")
52
+
53
+ st.caption(f"Model's Confidence — Not insensitive: {prob_not_insensitive:.2f}%, Insensitive: {prob_insensitive:.2f}%")