Spaces:

emilylearning
/

choose_your_own_spurious

Runtime error

App Files Files Community

choose_your_own_spurious / app.py

emilylearning

format file and remove share=True

08879a1 over 2 years ago

raw

history blame contribute delete

11.7 kB

	# !pip install gradio -q
	# !pip install transformers -q

	# %%
	import gradio as gr
	import matplotlib.pyplot as plt
	import numpy as np
	import pandas as pd
	import random
	from matplotlib.ticker import MaxNLocator
	from transformers import pipeline

	# %%
	MODEL_NAMES = [
	"bert-base-uncased",
	"roberta-base",
	"bert-large-uncased",
	"roberta-large",
	]
	OWN_MODEL_NAME = "add-a-model"

	DECIMAL_PLACES = 1
	EPS = 1e-5 # to avoid /0 errors
	# %%

	# Fire up the models
	models = dict()

	for bert_like in MODEL_NAMES:
	models[bert_like] = pipeline("fill-mask", model=bert_like)

	# %%


	def clean_tokens(tokens):
	return [token.strip() for token in tokens]


	def prepare_text_for_masking(input_text, mask_token, gendered_tokens, split_key):
	text_w_masks_list = [
	mask_token if word.lower() in gendered_tokens else word
	for word in input_text.split()
	]
	num_masks = len([m for m in text_w_masks_list if m == mask_token])

	text_portions = " ".join(text_w_masks_list).split(split_key)
	return text_portions, num_masks


	def get_avg_prob_from_pipeline_outputs(mask_filled_text, gendered_token, num_preds):
	pronoun_preds = [
	sum(
	[
	pronoun["score"]
	if pronoun["token_str"].strip().lower() in gendered_token
	else 0.0
	for pronoun in top_preds
	]
	)
	for top_preds in mask_filled_text
	]
	return round(sum(pronoun_preds) / (EPS + num_preds) * 100, DECIMAL_PLACES)


	def get_figure(df, gender, n_fit=1):
	df = df.set_index("x-axis")
	cols = df.columns
	xs = list(range(len(df)))
	ys = df[cols[0]]
	fig, ax = plt.subplots()
	# Trying small fig due to rendering issues on HF, not on VS Code
	fig.set_figheight(3)
	fig.set_figwidth(9)

	# find stackoverflow reference
	p, C_p = np.polyfit(xs, ys, n_fit, cov=1)
	t = np.linspace(min(xs) - 1, max(xs) + 1, 10 * len(xs))
	TT = np.vstack([t ** (n_fit - i) for i in range(n_fit + 1)]).T

	# matrix multiplication calculates the polynomial values
	yi = np.dot(TT, p)
	C_yi = np.dot(TT, np.dot(C_p, TT.T)) # C_y = TTC_zTT.T
	sig_yi = np.sqrt(np.diag(C_yi)) # Standard deviations are sqrt of diagonal

	ax.fill_between(t, yi + sig_yi, yi - sig_yi, alpha=0.25)
	ax.plot(t, yi, "-")
	ax.plot(df, "ro")
	ax.legend(list(df.columns))

	ax.axis("tight")
	ax.set_xlabel("Value injected into input text")
	ax.set_title(f"Probability of predicting {gender} tokens.")
	ax.set_ylabel(f"Softmax prob")
	ax.tick_params(axis="x", labelrotation=5)
	ax.set_ylim(0, 100)
	return fig


	# %%
	def predict_masked_tokens(
	model_name,
	own_model_name,
	group_a_tokens,
	group_b_tokens,
	indie_vars,
	split_key,
	normalizing,
	n_fit,
	input_text,
	):
	"""Run inference on input_text for each model type, returning df and plots of percentage
	of gender pronouns predicted as female and male in each target text.
	"""
	if model_name not in MODEL_NAMES:
	model = pipeline("fill-mask", model=own_model_name)
	else:
	model = models[model_name]

	mask_token = model.tokenizer.mask_token

	indie_vars_list = indie_vars.split(",")

	group_a_tokens = clean_tokens(group_a_tokens.split(","))
	group_b_tokens = clean_tokens(group_b_tokens.split(","))

	text_segments, num_preds = prepare_text_for_masking(
	input_text, mask_token, group_b_tokens + group_a_tokens, split_key
	)

	male_pronoun_preds = []
	female_pronoun_preds = []
	for indie_var in indie_vars_list:
	target_text = f"{indie_var}".join(text_segments)
	mask_filled_text = model(target_text)
	# Quick hack as realized return type based on how many MASKs in text.
	if type(mask_filled_text[0]) is not list:
	mask_filled_text = [mask_filled_text]

	female_pronoun_preds.append(
	get_avg_prob_from_pipeline_outputs(
	mask_filled_text, group_a_tokens, num_preds
	)
	)
	male_pronoun_preds.append(
	get_avg_prob_from_pipeline_outputs(
	mask_filled_text, group_b_tokens, num_preds
	)
	)

	if normalizing:
	total_gendered_probs = np.add(female_pronoun_preds, male_pronoun_preds)
	female_pronoun_preds = np.around(
	np.divide(female_pronoun_preds, total_gendered_probs + EPS) * 100,
	decimals=DECIMAL_PLACES,
	)
	male_pronoun_preds = np.around(
	np.divide(male_pronoun_preds, total_gendered_probs + EPS) * 100,
	decimals=DECIMAL_PLACES,
	)

	results_df = pd.DataFrame({"x-axis": indie_vars_list})
	results_df["group_a"] = female_pronoun_preds
	results_df["group_b"] = male_pronoun_preds
	female_fig = get_figure(
	results_df.drop("group_b", axis=1),
	"group_a",
	n_fit,
	)
	male_fig = get_figure(
	results_df.drop("group_a", axis=1),
	"group_b",
	n_fit,
	)
	display_text = f"{random.choice(indie_vars_list)}".join(text_segments)

	return (
	display_text,
	female_fig,
	male_fig,
	results_df,
	)


	truck_fn_example = [
	MODEL_NAMES[2],
	"",
	", ".join(["truck", "pickup"]),
	", ".join(["car", "sedan"]),
	", ".join(["city", "neighborhood", "farm"]),
	"PLACE",
	"True",
	1,
	]


	def truck_1_fn():
	return truck_fn_example + ["He loaded up his truck and drove to the PLACE."]


	def truck_2_fn():
	return truck_fn_example + [
	"He loaded up the bed of his truck and drove to the PLACE."
	]


	# # %%


	demo = gr.Blocks()
	with demo:
	gr.Markdown("# Spurious Correlation Evaluation for Pre-trained LLMs")

	gr.Markdown("## Instructions for this Demo")
	gr.Markdown(
	"1) Click on one of the examples below to pre-populate the input fields."
	)
	gr.Markdown(
	"2) Check out the pre-populated fields as you scroll down to the ['Hit Submit...'] button!"
	)
	gr.Markdown(
	"3) Repeat steps (1) and (2) with more pre-populated inputs or with your own values in the input fields!"
	)

	gr.Markdown(
	"""The pre-populated inputs below are for a demo example of a location-vs-vehicle-type spurious correlation.
	We can see this spurious correlation largely disappears in the well-specified example text.

	<p align="center">
	<img src="file/non_well_spec.png" alt="results" width="300"/>
	</p>


	<p align="center">
	<img src="file/well_spec.png" alt="results" width="300"/>
	</p>
	"""
	)

	gr.Markdown("## Example inputs")
	gr.Markdown(
	"Click a button below to pre-populate input fields with example values. Then scroll down to Hit Submit to generate predictions."
	)
	with gr.Row():
	truck_1_gen = gr.Button(
	"Click for non-well-specified(?) vehicle-type example inputs"
	)
	gr.Markdown(
	"<-- Multiple solutions with low training error. LLM sensitive to spurious(?) correlations."
	)

	truck_2_gen = gr.Button("Click for well-specified vehicle-type example inputs")
	gr.Markdown(
	"<-- Fewer solutions with low training error. LLM less sensitive to spurious(?) correlations."
	)

	gr.Markdown("## Input fields")
	gr.Markdown(
	f"A) Pick a spectrum of comma separated values for text injection and x-axis."
	)

	with gr.Row():
	group_a_tokens = gr.Textbox(
	type="text",
	lines=3,
	label="A) To-MASK tokens A: Comma separated words that account for accumulated group A softmax probs",
	)

	group_b_tokens = gr.Textbox(
	type="text",
	lines=3,
	label="B) To-MASK tokens B: Comma separated words that account for accumulated group B softmax probs",
	)

	with gr.Row():
	x_axis = gr.Textbox(
	type="text",
	lines=3,
	label="C) Comma separated values for text injection and x-axis",
	)

	gr.Markdown("D) Pick a pre-loaded BERT-family model of interest on the right.")
	gr.Markdown(
	f"Or E) select `{OWN_MODEL_NAME}`, then add the mame of any other Hugging Face model that supports the [fill-mask](https://huggingface.co/models?pipeline_tag=fill-mask) task on the right (note: this may take some time to load)."
	)

	with gr.Row():
	model_name = gr.Radio(
	MODEL_NAMES + [OWN_MODEL_NAME],
	type="value",
	label="D) BERT-like model.",
	)
	own_model_name = gr.Textbox(
	label="E) If you selected an 'add-a-model' model, put any Hugging Face pipeline model name (that supports the fill-mask task) here.",
	)

	gr.Markdown(
	"F) Pick if you want to the predictions normalied to only those from group A or B."
	)
	gr.Markdown(
	"G) Also tell the demo what special token you will use in your input text, that you would like replaced with the spectrum of values you listed above."
	)
	gr.Markdown(
	"And H) the degree of polynomial fit used for high-lighting potential spurious association."
	)

	with gr.Row():
	to_normalize = gr.Dropdown(
	["False", "True"],
	label="D) Normalize model's predictions?",
	type="index",
	)
	place_holder = gr.Textbox(
	label="E) Special token place-holder",
	)
	n_fit = gr.Dropdown(
	list(range(1, 5)),
	label="F) Degree of polynomial fit",
	type="value",
	)

	gr.Markdown(
	"I) Finally, add input text that includes at least one of the '`To-MASK`' tokens from (A) or (B) and one place-holder token from (G)."
	)

	with gr.Row():
	input_text = gr.Textbox(
	lines=2,
	label="I) Input text with a '`To-MASK`' and place-holder token",
	)

	gr.Markdown("## Outputs!")
	with gr.Row():
	btn = gr.Button("Hit submit to generate predictions!")

	with gr.Row():
	sample_text = gr.Textbox(
	type="text", label="Output text: Sample of text fed to model"
	)
	with gr.Row():
	female_fig = gr.Plot(type="auto")
	male_fig = gr.Plot(type="auto")
	with gr.Row():
	df = gr.Dataframe(
	show_label=True,
	overflow_row_behaviour="show_ends",
	label="Table of softmax probability for grouped predictions",
	)

	with gr.Row():
	truck_1_gen.click(
	truck_1_fn,
	inputs=[],
	outputs=[
	model_name,
	own_model_name,
	group_a_tokens,
	group_b_tokens,
	x_axis,
	place_holder,
	to_normalize,
	n_fit,
	input_text,
	],
	)

	truck_2_gen.click(
	truck_2_fn,
	inputs=[],
	outputs=[
	model_name,
	own_model_name,
	group_a_tokens,
	group_b_tokens,
	x_axis,
	place_holder,
	to_normalize,
	n_fit,
	input_text,
	],
	)

	btn.click(
	predict_masked_tokens,
	inputs=[
	model_name,
	own_model_name,
	group_a_tokens,
	group_b_tokens,
	x_axis,
	place_holder,
	to_normalize,
	n_fit,
	input_text,
	],
	outputs=[sample_text, female_fig, male_fig, df],
	)

	demo.launch(debug=True)

	# %%