Spaces:
Runtime error
Runtime error
| # !pip install gradio -q | |
| # !pip install transformers -q | |
| # %% | |
| import gradio as gr | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| import pandas as pd | |
| import random | |
| from matplotlib.ticker import MaxNLocator | |
| from transformers import pipeline | |
| # %% | |
| MODEL_NAMES = [ | |
| "bert-base-uncased", | |
| "roberta-base", | |
| "bert-large-uncased", | |
| "roberta-large", | |
| ] | |
| OWN_MODEL_NAME = "add-a-model" | |
| DECIMAL_PLACES = 1 | |
| EPS = 1e-5 # to avoid /0 errors | |
| # %% | |
| # Fire up the models | |
| models = dict() | |
| for bert_like in MODEL_NAMES: | |
| models[bert_like] = pipeline("fill-mask", model=bert_like) | |
| # %% | |
| def clean_tokens(tokens): | |
| return [token.strip() for token in tokens] | |
| def prepare_text_for_masking(input_text, mask_token, gendered_tokens, split_key): | |
| text_w_masks_list = [ | |
| mask_token if word.lower() in gendered_tokens else word | |
| for word in input_text.split() | |
| ] | |
| num_masks = len([m for m in text_w_masks_list if m == mask_token]) | |
| text_portions = " ".join(text_w_masks_list).split(split_key) | |
| return text_portions, num_masks | |
| def get_avg_prob_from_pipeline_outputs(mask_filled_text, gendered_token, num_preds): | |
| pronoun_preds = [ | |
| sum( | |
| [ | |
| pronoun["score"] | |
| if pronoun["token_str"].strip().lower() in gendered_token | |
| else 0.0 | |
| for pronoun in top_preds | |
| ] | |
| ) | |
| for top_preds in mask_filled_text | |
| ] | |
| return round(sum(pronoun_preds) / (EPS + num_preds) * 100, DECIMAL_PLACES) | |
| def get_figure(df, gender, n_fit=1): | |
| df = df.set_index("x-axis") | |
| cols = df.columns | |
| xs = list(range(len(df))) | |
| ys = df[cols[0]] | |
| fig, ax = plt.subplots() | |
| # Trying small fig due to rendering issues on HF, not on VS Code | |
| fig.set_figheight(3) | |
| fig.set_figwidth(9) | |
| # find stackoverflow reference | |
| p, C_p = np.polyfit(xs, ys, n_fit, cov=1) | |
| t = np.linspace(min(xs) - 1, max(xs) + 1, 10 * len(xs)) | |
| TT = np.vstack([t ** (n_fit - i) for i in range(n_fit + 1)]).T | |
| # matrix multiplication calculates the polynomial values | |
| yi = np.dot(TT, p) | |
| C_yi = np.dot(TT, np.dot(C_p, TT.T)) # C_y = TT*C_z*TT.T | |
| sig_yi = np.sqrt(np.diag(C_yi)) # Standard deviations are sqrt of diagonal | |
| ax.fill_between(t, yi + sig_yi, yi - sig_yi, alpha=0.25) | |
| ax.plot(t, yi, "-") | |
| ax.plot(df, "ro") | |
| ax.legend(list(df.columns)) | |
| ax.axis("tight") | |
| ax.set_xlabel("Value injected into input text") | |
| ax.set_title(f"Probability of predicting {gender} tokens.") | |
| ax.set_ylabel(f"Softmax prob") | |
| ax.tick_params(axis="x", labelrotation=5) | |
| ax.set_ylim(0, 100) | |
| return fig | |
| # %% | |
| def predict_masked_tokens( | |
| model_name, | |
| own_model_name, | |
| group_a_tokens, | |
| group_b_tokens, | |
| indie_vars, | |
| split_key, | |
| normalizing, | |
| n_fit, | |
| input_text, | |
| ): | |
| """Run inference on input_text for each model type, returning df and plots of percentage | |
| of gender pronouns predicted as female and male in each target text. | |
| """ | |
| if model_name not in MODEL_NAMES: | |
| model = pipeline("fill-mask", model=own_model_name) | |
| else: | |
| model = models[model_name] | |
| mask_token = model.tokenizer.mask_token | |
| indie_vars_list = indie_vars.split(",") | |
| group_a_tokens = clean_tokens(group_a_tokens.split(",")) | |
| group_b_tokens = clean_tokens(group_b_tokens.split(",")) | |
| text_segments, num_preds = prepare_text_for_masking( | |
| input_text, mask_token, group_b_tokens + group_a_tokens, split_key | |
| ) | |
| male_pronoun_preds = [] | |
| female_pronoun_preds = [] | |
| for indie_var in indie_vars_list: | |
| target_text = f"{indie_var}".join(text_segments) | |
| mask_filled_text = model(target_text) | |
| # Quick hack as realized return type based on how many MASKs in text. | |
| if type(mask_filled_text[0]) is not list: | |
| mask_filled_text = [mask_filled_text] | |
| female_pronoun_preds.append( | |
| get_avg_prob_from_pipeline_outputs( | |
| mask_filled_text, group_a_tokens, num_preds | |
| ) | |
| ) | |
| male_pronoun_preds.append( | |
| get_avg_prob_from_pipeline_outputs( | |
| mask_filled_text, group_b_tokens, num_preds | |
| ) | |
| ) | |
| if normalizing: | |
| total_gendered_probs = np.add(female_pronoun_preds, male_pronoun_preds) | |
| female_pronoun_preds = np.around( | |
| np.divide(female_pronoun_preds, total_gendered_probs + EPS) * 100, | |
| decimals=DECIMAL_PLACES, | |
| ) | |
| male_pronoun_preds = np.around( | |
| np.divide(male_pronoun_preds, total_gendered_probs + EPS) * 100, | |
| decimals=DECIMAL_PLACES, | |
| ) | |
| results_df = pd.DataFrame({"x-axis": indie_vars_list}) | |
| results_df["group_a"] = female_pronoun_preds | |
| results_df["group_b"] = male_pronoun_preds | |
| female_fig = get_figure( | |
| results_df.drop("group_b", axis=1), | |
| "group_a", | |
| n_fit, | |
| ) | |
| male_fig = get_figure( | |
| results_df.drop("group_a", axis=1), | |
| "group_b", | |
| n_fit, | |
| ) | |
| display_text = f"{random.choice(indie_vars_list)}".join(text_segments) | |
| return ( | |
| display_text, | |
| female_fig, | |
| male_fig, | |
| results_df, | |
| ) | |
| truck_fn_example = [ | |
| MODEL_NAMES[2], | |
| "", | |
| ", ".join(["truck", "pickup"]), | |
| ", ".join(["car", "sedan"]), | |
| ", ".join(["city", "neighborhood", "farm"]), | |
| "PLACE", | |
| "True", | |
| 1, | |
| ] | |
| def truck_1_fn(): | |
| return truck_fn_example + ["He loaded up his truck and drove to the PLACE."] | |
| def truck_2_fn(): | |
| return truck_fn_example + [ | |
| "He loaded up the bed of his truck and drove to the PLACE." | |
| ] | |
| # # %% | |
| demo = gr.Blocks() | |
| with demo: | |
| gr.Markdown("# Spurious Correlation Evaluation for Pre-trained LLMs") | |
| gr.Markdown("## Instructions for this Demo") | |
| gr.Markdown( | |
| "1) Click on one of the examples below to pre-populate the input fields." | |
| ) | |
| gr.Markdown( | |
| "2) Check out the pre-populated fields as you scroll down to the ['Hit Submit...'] button!" | |
| ) | |
| gr.Markdown( | |
| "3) Repeat steps (1) and (2) with more pre-populated inputs or with your own values in the input fields!" | |
| ) | |
| gr.Markdown( | |
| """The pre-populated inputs below are for a demo example of a location-vs-vehicle-type spurious correlation. | |
| We can see this spurious correlation largely disappears in the well-specified example text. | |
| <p align="center"> | |
| <img src="file/non_well_spec.png" alt="results" width="300"/> | |
| </p> | |
| <p align="center"> | |
| <img src="file/well_spec.png" alt="results" width="300"/> | |
| </p> | |
| """ | |
| ) | |
| gr.Markdown("## Example inputs") | |
| gr.Markdown( | |
| "Click a button below to pre-populate input fields with example values. Then scroll down to Hit Submit to generate predictions." | |
| ) | |
| with gr.Row(): | |
| truck_1_gen = gr.Button( | |
| "Click for non-well-specified(?) vehicle-type example inputs" | |
| ) | |
| gr.Markdown( | |
| "<-- Multiple solutions with low training error. LLM sensitive to spurious(?) correlations." | |
| ) | |
| truck_2_gen = gr.Button("Click for well-specified vehicle-type example inputs") | |
| gr.Markdown( | |
| "<-- Fewer solutions with low training error. LLM less sensitive to spurious(?) correlations." | |
| ) | |
| gr.Markdown("## Input fields") | |
| gr.Markdown( | |
| f"A) Pick a spectrum of comma separated values for text injection and x-axis." | |
| ) | |
| with gr.Row(): | |
| group_a_tokens = gr.Textbox( | |
| type="text", | |
| lines=3, | |
| label="A) To-MASK tokens A: Comma separated words that account for accumulated group A softmax probs", | |
| ) | |
| group_b_tokens = gr.Textbox( | |
| type="text", | |
| lines=3, | |
| label="B) To-MASK tokens B: Comma separated words that account for accumulated group B softmax probs", | |
| ) | |
| with gr.Row(): | |
| x_axis = gr.Textbox( | |
| type="text", | |
| lines=3, | |
| label="C) Comma separated values for text injection and x-axis", | |
| ) | |
| gr.Markdown("D) Pick a pre-loaded BERT-family model of interest on the right.") | |
| gr.Markdown( | |
| f"Or E) select `{OWN_MODEL_NAME}`, then add the mame of any other Hugging Face model that supports the [fill-mask](https://huggingface.co/models?pipeline_tag=fill-mask) task on the right (note: this may take some time to load)." | |
| ) | |
| with gr.Row(): | |
| model_name = gr.Radio( | |
| MODEL_NAMES + [OWN_MODEL_NAME], | |
| type="value", | |
| label="D) BERT-like model.", | |
| ) | |
| own_model_name = gr.Textbox( | |
| label="E) If you selected an 'add-a-model' model, put any Hugging Face pipeline model name (that supports the fill-mask task) here.", | |
| ) | |
| gr.Markdown( | |
| "F) Pick if you want to the predictions normalied to only those from group A or B." | |
| ) | |
| gr.Markdown( | |
| "G) Also tell the demo what special token you will use in your input text, that you would like replaced with the spectrum of values you listed above." | |
| ) | |
| gr.Markdown( | |
| "And H) the degree of polynomial fit used for high-lighting potential spurious association." | |
| ) | |
| with gr.Row(): | |
| to_normalize = gr.Dropdown( | |
| ["False", "True"], | |
| label="D) Normalize model's predictions?", | |
| type="index", | |
| ) | |
| place_holder = gr.Textbox( | |
| label="E) Special token place-holder", | |
| ) | |
| n_fit = gr.Dropdown( | |
| list(range(1, 5)), | |
| label="F) Degree of polynomial fit", | |
| type="value", | |
| ) | |
| gr.Markdown( | |
| "I) Finally, add input text that includes at least one of the '`To-MASK`' tokens from (A) or (B) and one place-holder token from (G)." | |
| ) | |
| with gr.Row(): | |
| input_text = gr.Textbox( | |
| lines=2, | |
| label="I) Input text with a '`To-MASK`' and place-holder token", | |
| ) | |
| gr.Markdown("## Outputs!") | |
| with gr.Row(): | |
| btn = gr.Button("Hit submit to generate predictions!") | |
| with gr.Row(): | |
| sample_text = gr.Textbox( | |
| type="text", label="Output text: Sample of text fed to model" | |
| ) | |
| with gr.Row(): | |
| female_fig = gr.Plot(type="auto") | |
| male_fig = gr.Plot(type="auto") | |
| with gr.Row(): | |
| df = gr.Dataframe( | |
| show_label=True, | |
| overflow_row_behaviour="show_ends", | |
| label="Table of softmax probability for grouped predictions", | |
| ) | |
| with gr.Row(): | |
| truck_1_gen.click( | |
| truck_1_fn, | |
| inputs=[], | |
| outputs=[ | |
| model_name, | |
| own_model_name, | |
| group_a_tokens, | |
| group_b_tokens, | |
| x_axis, | |
| place_holder, | |
| to_normalize, | |
| n_fit, | |
| input_text, | |
| ], | |
| ) | |
| truck_2_gen.click( | |
| truck_2_fn, | |
| inputs=[], | |
| outputs=[ | |
| model_name, | |
| own_model_name, | |
| group_a_tokens, | |
| group_b_tokens, | |
| x_axis, | |
| place_holder, | |
| to_normalize, | |
| n_fit, | |
| input_text, | |
| ], | |
| ) | |
| btn.click( | |
| predict_masked_tokens, | |
| inputs=[ | |
| model_name, | |
| own_model_name, | |
| group_a_tokens, | |
| group_b_tokens, | |
| x_axis, | |
| place_holder, | |
| to_normalize, | |
| n_fit, | |
| input_text, | |
| ], | |
| outputs=[sample_text, female_fig, male_fig, df], | |
| ) | |
| demo.launch(debug=True) | |
| # %% | |