Spaces:

OFA-Sys
/

expertllama

Runtime error

App Files Files Community

SpiketheCowboy commited on May 30, 2023

Commit

99660e9

1 Parent(s): a7e983f

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

app.py +97 -0
requirements.txt +2 -0

app.py ADDED Viewed

	@@ -0,0 +1,97 @@

+'''
+simple demo adapted from [gradio](https://gradio.app/creating-a-chatbot/).
+'''
+import gradio as gr
+import random
+import time
+import transformers
+import os
+import json
+import torch
+import argparse
+from transformers import LlamaTokenizer, LlamaForCausalLM
+def apply_delta(base_model_path, target_model_path, delta_path):
+    print(f"Loading the delta weights from {delta_path}")
+    delta_tokenizer = LlamaTokenizer.from_pretrained(delta_path, use_fast=False)
+    delta = LlamaForCausalLM.from_pretrained(
+        delta_path, low_cpu_mem_usage=True, torch_dtype=torch.float16
+    )
+    print(f"Loading the base model from {base_model_path}")
+    base_tokenizer = LlamaTokenizer.from_pretrained(base_model_path, use_fast=False)
+    base = LlamaForCausalLM.from_pretrained(
+        base_model_path, low_cpu_mem_usage=True
+    )
+    # following alpaca training recipe, we have added new initialized tokens
+    DEFAULT_PAD_TOKEN = "[PAD]"
+    DEFAULT_EOS_TOKEN = "</s>"
+    DEFAULT_BOS_TOKEN = "<s>"
+    DEFAULT_UNK_TOKEN = "<unk>"
+    special_tokens_dict = {
+        "pad_token": DEFAULT_PAD_TOKEN,
+        "eos_token": DEFAULT_EOS_TOKEN,
+        "bos_token": DEFAULT_BOS_TOKEN,
+        "unk_token": DEFAULT_UNK_TOKEN,
+    }
+    num_new_tokens = base_tokenizer.add_special_tokens(special_tokens_dict)
+    base.resize_token_embeddings(len(base_tokenizer))
+    input_embeddings = base.get_input_embeddings().weight.data
+    output_embeddings = base.get_output_embeddings().weight.data
+    input_embeddings[-num_new_tokens:] = 0
+    output_embeddings[-num_new_tokens:] = 0
+    print("Applying the delta")
+    target_weights = {}
+    for name, param in tqdm(base.state_dict().items(), desc="Applying delta"):
+        assert name in delta.state_dict()
+        param.data += delta.state_dict()[name]
+        target_weights[name] = param.data
+    print(f"Saving the target model to {target_model_path}")
+    base.load_state_dict(target_weights)
+    base.save_pretrained(target_model_path)
+    delta_tokenizer.save_pretrained(target_model_path)
+base_weights = 'decapoda-research/llama-7b-hf'
+target_weights = 'expertllama' # local path
+delta_weights = 'OFA-Sys/expertllama-7b-delta'
+apply_delta(base_weights, target_weights, delta_weights)
+tokenizer = transformers.LlamaTokenizer.from_pretrained(expertllama_path)
+model = transformers.LlamaForCausalLM.from_pretrained(expertllama_path, torch_dtype=torch.float16, low_cpu_mem_usage=True)
+# model.cuda()
+with gr.Blocks() as demo:
+    chatbot = gr.Chatbot()
+    msg = gr.Textbox()
+    clear = gr.Button("Clear")
+    def respond(message, chat_history):
+        # prompt wrapper, only single-turn is allowed for now
+        prompt = f"### Human:\n{prompt}\n\n### Assistant:\n"
+        batch = tokenizer(
+            prompt,
+            return_tensors="pt",
+            add_special_tokens=False
+        )
+        batch = {k: v.cuda() for k, v in batch.items()}
+        generated = model.generate(batch["input_ids"], max_length=1024, temperature=0.8)
+        bot_message = tokenizer.decode(generated[0][:-1]).split("### Assistant:\n", 1)[1]
+        chat_history.append((message, bot_message))
+        time.sleep(1)
+        return "", chat_history
+    msg.submit(respond, [msg, chatbot], [msg, chatbot])
+    clear.click(lambda: None, None, chatbot, queue=False)
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ torch
2	+ transformers