Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| # from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| from transformers import pipeline | |
| title = "Model Exploration" | |
| description = "Comparison of hate speech detection models" | |
| date = "2022-01-26" | |
| thumbnail = "images/huggingface_logo.png" | |
| # Creates the forms for receiving multiple inputs to compare for a single | |
| # model or one input to compare for two models | |
| def run_article(): | |
| st.markdown(""" | |
| # Making a Hate Speech Detection Model | |
| Once the data has been collected using the definitions identified for the | |
| task, you can start training your model. At training, the model takes in | |
| the data with labels and learns the associated context in the input data | |
| for each label. Depending on the task design, the labels may be binary like | |
| 'hateful' and 'non-hateful' or multiclass like 'neutral', 'offensive', and | |
| 'attack'. | |
| When presented with a new input string, the model then predicts the | |
| likelihood that the input is classified as each of the available labels and | |
| returns the label with the highest likelihood as well as how confident the | |
| model is in its selection using a score from 0 to 1. | |
| Neural models such as transformers are frequently trained as general | |
| language models and then fine-tuned on specific classification tasks. | |
| These models can vary in their architecture and the optimization | |
| algorithms, sometimes resulting in very different output for the same | |
| input text. | |
| # Model Output Ranking | |
| For now, here's a link to the [space](https://huggingface.co/spaces/aymm/ModelOutputRankingTool).""") | |
| with st.expander("Model Output Ranking Tool", expanded=False): | |
| with st.form(key='ranking'): | |
| model_name = st.selectbox("Select a model to test", | |
| ["classla/roberta-base-frenk-hate", | |
| "cardiffnlp/twitter-roberta-base-hate", | |
| "Hate-speech-CNERG/dehatebert-mono-english"], | |
| key="rank_model_select" | |
| ) | |
| # the placeholder key functionality was added in v1.2 of streamlit | |
| # and versions on Spaces currently goesup to v1.0 | |
| input_1 = st.text_input("Input 1", | |
| #placeholder="We shouldn't let [IDENTITY] suffer.", | |
| help= "Try a phrase like 'We shouldn't let [IDENTITY] suffer.'", | |
| key="rank_input_1") | |
| input_2 = st.text_input("Input 2", | |
| #placeholder="I'd rather die than date [IDENTITY].", | |
| help= "Try a phrase like 'I'd rather die than date [IDENTITY].'", | |
| key="rank_input_2") | |
| input_3 = st.text_input("Input 3", | |
| #placeholder="Good morning.", | |
| help= "Try a phrase like 'Good morning'", | |
| key="rank_input_3") | |
| inputs = [input_1, input_2, input_3] | |
| if st.form_submit_button(label="Rank inputs"): | |
| results = run_ranked(model_name, inputs) | |
| st.dataframe(results) | |
| st.markdown(""" | |
| # Model Comparison | |
| For now, here's a link to the [space](https://huggingface.co/spaces/aymm/ModelComparisonTool). | |
| """) | |
| with st.expander("Model Comparison Tool", expanded=False): | |
| with st.form(key='comparison'): | |
| model_name_1 = st.selectbox("Select a model to compare", | |
| ["cardiffnlp/twitter-roberta-base-hate", | |
| "Hate-speech-CNERG/dehatebert-mono-english", | |
| ], | |
| key='compare_model_1' | |
| ) | |
| model_name_2 = st.selectbox("Select another model to compare", | |
| ["cardiffnlp/twitter-roberta-base-hate", | |
| "Hate-speech-CNERG/dehatebert-mono-english", | |
| ], | |
| key='compare_model_2' | |
| ) | |
| input_text = st.text_input("Comparison input", | |
| key="compare_input") | |
| if st.form_submit_button(label="Compare models"): | |
| results = run_compare(model_name_1, model_name_2, input_text) | |
| st.dataframe(results) | |
| # Runs the received input strings through the given model and returns the | |
| # output ranked by label and score (does not assume binary labels so the | |
| # highest score for each label is at the top) | |
| def run_ranked(model, input_list): | |
| classifier = pipeline("text-classification", model=model) | |
| output = [] | |
| labels = {} | |
| for inputx in input_list: | |
| result = classifier(inputx) | |
| curr = {} | |
| curr['Input'] = inputx | |
| label = result[0]['label'] | |
| curr['Label'] = label | |
| score = result[0]['score'] | |
| curr['Score'] = score | |
| if label in labels: | |
| labels[label].append(curr) | |
| else: | |
| labels[label] = [curr] | |
| for label in labels: | |
| sort_list = sorted(labels[label], key=lambda item:item['Score'], reverse=True) | |
| output += sort_list | |
| return output | |
| # Takes in two model names and returns the output of both models for that | |
| # given input string | |
| def run_compare(name_1, name_2, text): | |
| classifier_1 = pipeline("text-classification", model=name_1) | |
| result_1 = classifier_1(text) | |
| out_1 = {} | |
| out_1['Model'] = name_1 | |
| out_1['Label'] = result_1[0]['label'] | |
| out_1['Score'] = result_1[0]['score'] | |
| classifier_2 = pipeline("text-classification", model=name_2) | |
| result_2 = classifier_2(text) | |
| out_2 = {} | |
| out_2['Model'] = name_2 | |
| out_2['Label'] = result_2[0]['label'] | |
| out_2['Score'] = result_2[0]['score'] | |
| return [out_1, out_2] | |
| def main(): | |
| run_article() | |
| main() | |