| import os | |
| import torch | |
| llama_layers_format = 'model.layers.{k}' | |
| gpt_layers_format = 'transformer.h.{k}' | |
| dataset_info = [ | |
| {'name': 'Common Sense', 'hf_repo': 'tau/commonsense_qa', 'text_col': 'question'}, | |
| {'name': 'Factual Recall', 'hf_repo': 'azhx/counterfact-filtered-gptj6b', 'text_col': 'subject+predicate', | |
| 'filter': lambda x: x['label'] == 1}, | |
| # {'name': 'Physical Understanding', 'hf_repo': 'piqa', 'text_col': 'goal'}, | |
| {'name': 'Social Reasoning', 'hf_repo': 'ProlificAI/social-reasoning-rlhf', 'text_col': 'question'}, | |
| {'name': 'Open Domain Question Answering', 'hf_repo': 'nq_open', 'text_col': 'question'}, | |
| ] | |
| model_info = { | |
| 'LLAMA2-7B': dict(model_path='meta-llama/Llama-2-7b-chat-hf', token=os.environ['hf_token'], | |
| original_prompt_template='<s>{prompt}', | |
| interpretation_prompt_template='<s>[INST] [X] [/INST] {prompt}', | |
| # load_in_8bit=True, | |
| # dont_cuda=True, | |
| layers_format=llama_layers_format), | |
| 'LLAMA2-13B': dict(model_path='meta-llama/Llama-2-13b-chat-hf', | |
| token=os.environ['hf_token'], torch_dtype=torch.float16, | |
| wait_with_hidden_states=True, | |
| # device_map='auto', max_memory={0: "15GB", 1: "30GB"}, dont_cuda=True, # load_in_8bit=True, | |
| original_prompt_template='<s>{prompt}', | |
| interpretation_prompt_template='<s>[INST] [X] [/INST] {prompt}', | |
| layers_format=llama_layers_format), | |
| 'GPT-J 6B': dict(model_path='EleutherAI/gpt-j-6b', original_prompt_template='{prompt}', | |
| interpretation_prompt_template='User: [X]\n\nAnswer: {prompt}', | |
| layers_format=gpt_layers_format), | |
| 'Mistral-7B Instruct': dict(model_path='mistralai/Mistral-7B-Instruct-v0.2', device_map='cpu', | |
| original_prompt_template='<s>{prompt}', | |
| interpretation_prompt_template='<s>[INST] [X] [/INST] {prompt}', | |
| layers_format=llama_layers_format), | |
| 'GPT-2 Small': dict(model_path='gpt2', original_prompt_template='{prompt}', | |
| interpretation_prompt_template='User: [X]\n\nAnswer: {prompt}', | |
| layers_format=gpt_layers_format), | |
| # 'Mixtral 8x7B Instruct (Experimental)': dict(model_path='TheBloke/Mixtral-8x7B-Instruct-v0.1-AWQ', | |
| # token=os.environ['hf_token'], wait_with_hidden_states=True, | |
| # original_prompt_template='<s>{prompt}', | |
| # interpretation_prompt_template='<s>[INST] [X] [/INST] {prompt}', | |
| # layers_format=llama_layers_format | |
| # ), | |
| # 'Wizard Vicuna 30B Uncensored (Experimental)': dict(model_path='TheBloke/Wizard-Vicuna-30B-Uncensored-GPTQ', | |
| # token=os.environ['hf_token'], | |
| # wait_with_hidden_states=True, dont_cuda=True, device_map='cuda', | |
| # original_prompt_template='<s>USER: {prompt}', | |
| # interpretation_prompt_template='<s>USER: [X] ASSISTANT: {prompt}', | |
| # layers_format=llama_layers_format | |
| # ), | |
| # 'GPT-2 Medium': dict(model_path='gpt2-medium', original_prompt_template='{prompt}', | |
| # interpretation_prompt_template='User: [X]\n\nAnswer: {prompt}', | |
| # layers_format=gpt_layers_format), | |
| # 'GPT-2 Large': dict(model_path='gpt2-large', original_prompt_template='{prompt}', | |
| # interpretation_prompt_template='User: [X]\n\nAnswer: {prompt}', | |
| # layers_format=gpt_layers_format), | |
| # 'GPT-2 XL': dict(model_path='gpt2-xl', original_prompt_template='{prompt}', | |
| # interpretation_prompt_template='User: [X]\n\nAnswer: {prompt}', | |
| # layers_format=gpt_layers_format), | |
| # 'CodeLLAMA 70B Instruct (Experimental)': dict(model_path='TheBloke/CodeLlama-70B-Instruct-GPTQ', | |
| # token=os.environ['hf_token'], | |
| # wait_with_hidden_states=True, dont_cuda=True, device_map='cuda', # disable_exllama=True, | |
| # original_prompt_template='<s>{prompt}', | |
| # interpretation_prompt_template='<s>[INST] [X] [/INST] {prompt}', | |
| # layers_format=llama_layers_format | |
| # ), | |
| # 'Gemma-2B': dict(model_path='google/gemma-2b', device_map='cpu', token=os.environ['hf_token'], | |
| # original_prompt_template='<bos>{prompt}', | |
| # interpretation_prompt_template='<bos>User: [X]\n\nAnswer: {prompt}', | |
| # ), | |
| # 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF': dict(model_file='mistral-7b-instruct-v0.2.Q5_K_S.gguf', | |
| # tokenizer='mistralai/Mistral-7B-Instruct-v0.2', | |
| # model_type='llama', hf=True, ctransformers=True, | |
| # original_prompt_template='<s>[INST] {prompt} [/INST]', | |
| # interpretation_prompt_template='<s>[INST] [X] [/INST] {prompt}', | |
| # ) | |
| } | |