Spaces:

aymm
/

Task-Exploration-Hate-Speech

Runtime error

App Files Files Community

Task-Exploration-Hate-Speech / posts /model_exploration.py

mcmillanmajora

Removed placeholder from model_exploration and streamlit version from requirements

e258141 almost 4 years ago

raw

history blame contribute delete

6.05 kB

	import streamlit as st

	# from transformers import AutoTokenizer, AutoModelForSequenceClassification
	from transformers import pipeline

	title = "Model Exploration"
	description = "Comparison of hate speech detection models"
	date = "2022-01-26"
	thumbnail = "images/huggingface_logo.png"


	# Creates the forms for receiving multiple inputs to compare for a single
	# model or one input to compare for two models
	def run_article():
	st.markdown("""
	# Making a Hate Speech Detection Model

	Once the data has been collected using the definitions identified for the
	task, you can start training your model. At training, the model takes in
	the data with labels and learns the associated context in the input data
	for each label. Depending on the task design, the labels may be binary like
	'hateful' and 'non-hateful' or multiclass like 'neutral', 'offensive', and
	'attack'.

	When presented with a new input string, the model then predicts the
	likelihood that the input is classified as each of the available labels and
	returns the label with the highest likelihood as well as how confident the
	model is in its selection using a score from 0 to 1.

	Neural models such as transformers are frequently trained as general
	language models and then fine-tuned on specific classification tasks.
	These models can vary in their architecture and the optimization
	algorithms, sometimes resulting in very different output for the same
	input text.

	# Model Output Ranking

	For now, here's a link to the [space](https://huggingface.co/spaces/aymm/ModelOutputRankingTool).""")
	with st.expander("Model Output Ranking Tool", expanded=False):
	with st.form(key='ranking'):
	model_name = st.selectbox("Select a model to test",
	["classla/roberta-base-frenk-hate",
	"cardiffnlp/twitter-roberta-base-hate",
	"Hate-speech-CNERG/dehatebert-mono-english"],
	key="rank_model_select"
	)

	# the placeholder key functionality was added in v1.2 of streamlit
	# and versions on Spaces currently goesup to v1.0
	input_1 = st.text_input("Input 1",
	#placeholder="We shouldn't let [IDENTITY] suffer.",
	help= "Try a phrase like 'We shouldn't let [IDENTITY] suffer.'",
	key="rank_input_1")
	input_2 = st.text_input("Input 2",
	#placeholder="I'd rather die than date [IDENTITY].",
	help= "Try a phrase like 'I'd rather die than date [IDENTITY].'",
	key="rank_input_2")
	input_3 = st.text_input("Input 3",
	#placeholder="Good morning.",
	help= "Try a phrase like 'Good morning'",
	key="rank_input_3")
	inputs = [input_1, input_2, input_3]

	if st.form_submit_button(label="Rank inputs"):
	results = run_ranked(model_name, inputs)
	st.dataframe(results)



	st.markdown("""
	# Model Comparison

	For now, here's a link to the [space](https://huggingface.co/spaces/aymm/ModelComparisonTool).
	""")
	with st.expander("Model Comparison Tool", expanded=False):
	with st.form(key='comparison'):
	model_name_1 = st.selectbox("Select a model to compare",
	["cardiffnlp/twitter-roberta-base-hate",
	"Hate-speech-CNERG/dehatebert-mono-english",
	],
	key='compare_model_1'
	)
	model_name_2 = st.selectbox("Select another model to compare",
	["cardiffnlp/twitter-roberta-base-hate",
	"Hate-speech-CNERG/dehatebert-mono-english",
	],
	key='compare_model_2'
	)
	input_text = st.text_input("Comparison input",
	key="compare_input")
	if st.form_submit_button(label="Compare models"):
	results = run_compare(model_name_1, model_name_2, input_text)
	st.dataframe(results)


	# Runs the received input strings through the given model and returns the
	# output ranked by label and score (does not assume binary labels so the
	# highest score for each label is at the top)
	def run_ranked(model, input_list):
	classifier = pipeline("text-classification", model=model)
	output = []
	labels = {}
	for inputx in input_list:
	result = classifier(inputx)
	curr = {}
	curr['Input'] = inputx
	label = result[0]['label']
	curr['Label'] = label
	score = result[0]['score']
	curr['Score'] = score
	if label in labels:
	labels[label].append(curr)
	else:
	labels[label] = [curr]
	for label in labels:
	sort_list = sorted(labels[label], key=lambda item:item['Score'], reverse=True)
	output += sort_list
	return output


	# Takes in two model names and returns the output of both models for that
	# given input string
	def run_compare(name_1, name_2, text):
	classifier_1 = pipeline("text-classification", model=name_1)
	result_1 = classifier_1(text)
	out_1 = {}
	out_1['Model'] = name_1
	out_1['Label'] = result_1[0]['label']
	out_1['Score'] = result_1[0]['score']
	classifier_2 = pipeline("text-classification", model=name_2)
	result_2 = classifier_2(text)
	out_2 = {}
	out_2['Model'] = name_2
	out_2['Label'] = result_2[0]['label']
	out_2['Score'] = result_2[0]['score']
	return [out_1, out_2]


	def main():
	run_article()

	main()