Spaces:

tetrisd
/

biasprobe

Runtime error

App Files Files Community

tetrisd commited on Nov 30, 2023

Commit

8ad9dbd

1 Parent(s): 306e122

Upload 4 files

Browse files

Files changed (4) hide show

app.py +113 -0
probe.pt +3 -0
requirements.txt +3 -0
scrollbar.css +46 -0

app.py ADDED Viewed

	@@ -0,0 +1,113 @@

+from threading import Lock
+import argparse
+import numpy as np
+from matplotlib import pyplot as plt
+import gradio as gr
+import torch
+import pandas as pd
+from biasprobe import BinaryProbe, PairwiseExtractionRunner, SimplePairPromptBuilder, ProbeConfig
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--seed', '-s', type=int, default=0, help="the random seed")
+    parser.add_argument('--port', '-p', type=int, default=8080, help="the port to launch the demo")
+    parser.add_argument('--no-cuda', action='store_true', help="Use CPUs instead of GPUs")
+    args = parser.parse_args()
+    return args
+def main():
+    args = get_args()
+    plt.switch_backend('agg')
+    dmap = 'auto'
+    mdict = {0: '24GIB'}
+    config = ProbeConfig.create_for_model('mistralai/Mistral-7B-Instruct-v0.1')
+    probe = BinaryProbe(config).cuda()
+    probe.load_state_dict(torch.load('probe.pt'))
+    runner = PairwiseExtractionRunner.from_pretrained('mistralai/Mistral-7B-Instruct-v0.1', optimize=True, max_memory=mdict, device_map=dmap, low_cpu_mem_usage=True)
+    device = "cpu" if args.no_cuda else "cuda"
+    lock = Lock()
+    @torch.no_grad()
+    def run_extraction(prompt):
+        builder = SimplePairPromptBuilder(criterion='more positive')
+        lst = [x.strip() for x in prompt.lower()[:300].split(',')][:100]
+        exp = runner.run_extraction(lst, lst, layers=[15], num_repeat=100, builder=builder, parallel=False, run_inference=True, debug=True, max_new_tokens=2)
+        test_ds = exp.make_dataset(15)
+        import torch
+        raw_scores = []
+        preds_list = []
+        hs = []
+        for idx, (tensor, labels) in enumerate(test_ds):
+            with torch.no_grad():
+                labels = labels - 1  # 1-indexed
+                if tensor.shape[0] != 2:
+                    continue
+                h = tensor[1] - tensor[0]
+                hs.append(h)
+                try:
+                    x = probe(tensor.unsqueeze(0).cuda().float()).squeeze()
+                except IndexError:
+                    continue
+                pred = [0, 1] if x.item() > 0 else [1, 0]
+                pred = np.array(pred)
+            if test_ds.original_examples is not None:
+                items = [x.content for x in test_ds.original_examples[idx].hits]
+                preds_list.append(np.array(items, dtype=object)[labels][pred].tolist())
+            raw_scores.append(x.item())
+        df = pd.DataFrame({'Win Rate': np.array(raw_scores) > 0, 'Word': [x[0] for x in preds_list]})
+        win_df = df.groupby('Word').mean('Win Rate')
+        win_df = win_df.reset_index().sort_values('Win Rate')
+        win_df['Win Rate'] = [str(x) + '%' for x in (win_df['Win Rate'] * 100).round(2).tolist()]
+        return win_df
+    with gr.Blocks(css='scrollbar.css') as demo:
+        md = '''# BiasProbe: Revealing Preference Biases in Language Model Representations
+        What do llamas really "think"? Type some words below to see how Mistral-7B-Instruct associates them with
+        positive and negative emotions. Higher win rates indicate that the word is more likely to be associated with
+        positive emotions than other words in the list.
+        Check out our paper, [What Do Llamas Really Think? Revealing Preference Biases in Language Model Representations](http://arxiv.org/abs/2210.04885).
+        See our [codebase](https://github.com/castorini/biasprobe) on GitHub.
+        '''
+        gr.Markdown(md)
+        with gr.Row():
+            with gr.Column():
+                text = gr.Textbox(label='Words', value='Republican, democrat, libertarian, authoritarian')
+                submit_btn = gr.Button('Submit', elem_id='submit-btn')
+            output = gr.DataFrame(pd.DataFrame({'Word': ['authoritarian', 'republican', 'democrat', 'libertarian'],
+                                                'Win Rate': ['44.44%', '81.82%', '100%', '100%']}))
+            submit_btn.click(
+                fn=run_extraction,
+                inputs=[text],
+                outputs=[output])
+    while True:
+        try:
+            demo.launch(server_name='0.0.0.0')
+        except OSError:
+            gr.close_all()
+        except KeyboardInterrupt:
+            gr.close_all()
+            break
+if __name__ == '__main__':
+    main()

probe.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fc369595d41f7a7339d4bd84790c7e117207087eb00b90762848eddcfb7a6c91
+size 17659

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio==3.36.1
+biasprobe
+flash-attn

scrollbar.css ADDED Viewed

	@@ -0,0 +1,46 @@

+.output-html {
+    overflow-x: auto;
+}
+.output-html::-webkit-scrollbar {
+    -webkit-appearance: none;
+}
+.output-html::-webkit-scrollbar:vertical {
+    width: 0px;
+}
+.output-html::-webkit-scrollbar:horizontal {
+    height: 11px;
+}
+.output-html::-webkit-scrollbar-thumb {
+    border-radius: 8px;
+    border: 2px solid white;
+    background-color: rgba(0, 0, 0, .5);
+}
+.output-html::-webkit-scrollbar-track {
+    background-color: #fff;
+    border-radius: 8px;
+}
+.spans {
+    min-height: 75px;
+}
+svg {
+    margin: auto;
+    display: block;
+}
+#submit-btn {
+    z-index: 999;
+}
+#viz {
+    width: 100%;
+    top: -30px;
+    object-fit: scale-down;
+    object-position: 0 100%;
+}