Spaces:

fsdlredteam
/

BuggingSpace

Runtime error

App Files Files Community

J-Antoine ZAGATO commited on Oct 1, 2022

Commit

9d80551

1 Parent(s): 10d46ff

Added toxicity comparison & flagging + refactoring

Browse files

Files changed (1) hide show

app.py +102 -22

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import torch
 import numpy as np
@@ -9,6 +10,7 @@ from datasets import load_dataset
 from transformers import GPT2Tokenizer, GPT2LMHeadModel, GPTNeoForCausalLM
 from transformers import BloomTokenizerFast, BloomForCausalLM
 DATASET = "allenai/real-toxicity-prompts"
 CHECKPOINTS = {
@@ -140,19 +142,21 @@ def show_dataset(dataset):
 def update_dropdown(prompts):
   return gr.update(choices=random_sample(prompts))
-def show_text(text):
-  new_text = "lol " + text
-  return gr.update(visible = True, value=new_text)
 def process_user_input(model, input):
   warning = 'Please enter a valid prompt.'
   if input == None:
-    input = warning
-  generated = generate(model, input)
   return (
       gr.update(visible = True, value=generated),
-      gr.update(visible=True)
       )
 def pass_to_textbox(input):
@@ -161,21 +165,52 @@ def pass_to_textbox(input):
 def run_detoxify(text):
   results = Detoxify('original').predict(text)
   json_ready_results = {cat:float(score) for (cat,score) in results.items()}
-  return gr.update(value=json_ready_results, visible=True)
 with gr.Blocks() as demo:
   gr.Markdown("# Project Interface proposal")
   dataset = gr.Variable(value=DATASET)
   prompts_var = gr.Variable(value=None)
   with gr.Row(equal_height=True):
-    with gr.Column():
       gr.Markdown("### 1. Select a prompt")
-      input_text = gr.Textbox(label="Write your prompt below.", interactive=True)
       gr.Markdown("— or —")
       inspo_button = gr.Button('Click here if you need some inspiration')
@@ -184,11 +219,8 @@ with gr.Blocks() as demo:
       randomize_button = gr.Button('Show another subset', visible=False)
-      inspo_button.click(fn=show_dataset, inputs=dataset, outputs=[prompts_drop, randomize_button, prompts_var])
-      randomize_button.click(fn=update_dropdown, inputs=prompts_var, outputs=prompts_drop)
-    with gr.Column():
       gr.Markdown("### 2. Evaluate output")
       generate_button = gr.Button('Pick a model below and submit your prompt')
@@ -199,16 +231,64 @@ with gr.Blocks() as demo:
       model_radio.change(fn=lambda value: value, inputs=model_radio, outputs=model_choice)
       output_text = gr.Textbox(label="Generated prompt.", visible=False)
-      toxi_button = gr.Button("Run a toxicity analysis of the model's output", visible=False)
-      toxi_scores = gr.JSON(visible=False)
-      generate_button.click(fn=process_user_input,
-                            inputs=[model_choice, input_text],
-                            outputs=[output_text,toxi_button])
-      toxi_button.click(fn=run_detoxify, inputs=output_text, outputs=toxi_scores)
 #demo.launch(debug=True)
 if __name__ == "__main__":

+import os
 import torch
 import numpy as np
 from transformers import GPT2Tokenizer, GPT2LMHeadModel, GPTNeoForCausalLM
 from transformers import BloomTokenizerFast, BloomForCausalLM
+HF_AUTH_TOKEN = os.environ.get('hf_token' or True)
 DATASET = "allenai/real-toxicity-prompts"
 CHECKPOINTS = {
 def update_dropdown(prompts):
   return gr.update(choices=random_sample(prompts))
 def process_user_input(model, input):
   warning = 'Please enter a valid prompt.'
   if input == None:
+    generated = warning
+  else:
+    generated = generate(model, input)
   return (
       gr.update(visible = True, value=generated),
+      gr.update(visible=True),
+      gr.update(visible=True),
+      gr.update(visible=True),
+      gr.update(visible=True),
+      input,
+      generated
       )
 def pass_to_textbox(input):
 def run_detoxify(text):
   results = Detoxify('original').predict(text)
   json_ready_results = {cat:float(score) for (cat,score) in results.items()}
+  return json_ready_results
+def compute_toxi_output(output_text):
+  scores = run_detoxify(output_text)
+  return (
+      gr.update(value=scores, visible=True),
+      gr.update(visible=True)
+      )
+def compute_change(input, output):
+  change_percent = round(((float(output)-input)/input)*100, 2)
+  return change_percent
+def compare_toxi_scores(input_text, output_scores):
+  input_scores = run_detoxify(input_text)
+  json_ready_results = {cat:float(score) for (cat,score) in input_scores.items()}
+  compare_scores = {
+      cat:compute_change(json_ready_results[cat], output_scores[cat])
+      for cat in json_ready_results
+      for cat in output_scores
+      }
+  return (
+      gr.update(value=json_ready_results, visible=True),
+      gr.update(value=compare_scores, visible=True)
+  )
 with gr.Blocks() as demo:
   gr.Markdown("# Project Interface proposal")
+  gr.Markdown("### Write description and user instructions here")
   dataset = gr.Variable(value=DATASET)
   prompts_var = gr.Variable(value=None)
+  input_var = gr.Variable(label="Input Prompt", value=None)
+  output_var = gr.Variable(label="Output",value=None)
+  flagging_callback = gr.HuggingFaceDatasetSaver(hf_token = HF_AUTH_TOKEN,
+                                                 dataset_name = "fsdlredteam/flagged",
+                                                 organization = "fsdlredteam",
+                                                 private = True )
   with gr.Row(equal_height=True):
+    with gr.Column(): # input & prompts dataset exploration
       gr.Markdown("### 1. Select a prompt")
+      input_text = gr.Textbox(label="Write your prompt below.", interactive=True, lines=4)
       gr.Markdown("— or —")
       inspo_button = gr.Button('Click here if you need some inspiration')
       randomize_button = gr.Button('Show another subset', visible=False)
+    with gr.Column(): # Model choice & output
       gr.Markdown("### 2. Evaluate output")
       generate_button = gr.Button('Pick a model below and submit your prompt')
       model_radio.change(fn=lambda value: value, inputs=model_radio, outputs=model_choice)
       output_text = gr.Textbox(label="Generated prompt.", visible=False)
+  with gr.Row(equal_height=True): # Flagging
+    flagging_callback.setup([input_text, output_text, model_radio], "flagged_data_points")
+    toxi_flag_button = gr.Button("Report toxic output here", visible=False)
+    unexpected_flag_button = gr.Button("Report incorrect output here", visible=False)
+    other_flag_button = gr.Button("Report other inappropriate output here", visible=False)
+  with gr.Row(equal_height=True): # Toxicity buttons
+    toxi_button = gr.Button("Run a toxicity analysis of the model's output", visible=False)
+    toxi_button_compare = gr.Button("Compare toxicity on input and output", visible=False)
+  with gr.Row(equal_height=True): # Toxicity scores
+    toxi_scores_input = gr.JSON(label = "Detoxify classification of your input", visible=False)
+    toxi_scores_output = gr.JSON(label="Detoxify classification of the model's output", visible=False)
+    toxi_scores_compare = gr.JSON(label = "Percentage change between Input and Output", visible=False)
+  inspo_button.click(fn=show_dataset,
+                     inputs=dataset,
+                     outputs=[prompts_drop, randomize_button, prompts_var])
+  randomize_button.click(fn=update_dropdown,
+                         inputs=prompts_var,
+                         outputs=prompts_drop)
+  generate_button.click(fn=process_user_input,
+                        inputs=[model_choice, input_text],
+                        outputs=[output_text,
+                                 toxi_button,
+                                 toxi_flag_button,
+                                 unexpected_flag_button,
+                                 other_flag_button,
+                                 input_var,
+                                 output_var])
+  toxi_button.click(fn=compute_toxi_output,
+                    inputs=output_text,
+                    outputs=[toxi_scores_output, toxi_button_compare])
+  toxi_button_compare.click(fn=compare_toxi_scores,
+                            inputs=[input_text, toxi_scores_output],
+                            outputs=[toxi_scores_input, toxi_scores_compare])
+  toxi_flag_button.click(lambda *args: flagging_callback.flag(args, flag_option = "toxic"),
+                         inputs=[input_text, output_text, model_radio],
+                         outputs=None,
+                         preprocess=False)
+  unexpected_flag_button.click(lambda *args: flagging_callback.flag(args, flag_option = "unexpected"),
+                         inputs=[input_text, output_text, model_radio],
+                         outputs=None,
+                         preprocess=False)
+  other_flag_button.click(lambda *args: flagging_callback.flag(args, flag_option = "other"),
+                          inputs=[input_text, output_text, model_radio],
+                          outputs=None,
+                          preprocess=False)
 #demo.launch(debug=True)
 if __name__ == "__main__":