Update app.py
Browse files
app.py
CHANGED
|
@@ -17,12 +17,12 @@ from transformers import (
|
|
| 17 |
)
|
| 18 |
|
| 19 |
|
| 20 |
-
model_name = "
|
| 21 |
max_new_tokens = 1536
|
| 22 |
|
| 23 |
# # small testing model:
|
| 24 |
-
|
| 25 |
-
|
| 26 |
|
| 27 |
auth_token = os.getenv("HF_TOKEN", None)
|
| 28 |
|
|
@@ -30,24 +30,20 @@ print(f"Starting to load the model {model_name} into memory")
|
|
| 30 |
|
| 31 |
m = AutoModelForCausalLM.from_pretrained(
|
| 32 |
model_name,
|
|
|
|
| 33 |
torch_dtype=torch.bfloat16,
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
tok = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_auth_token=auth_token)
|
| 39 |
|
| 40 |
-
stop_token_ids = tok.convert_tokens_to_ids(["<|im_end|>", "<|endoftext|>"])
|
| 41 |
|
| 42 |
print(f"Successfully loaded the model {model_name} into memory")
|
| 43 |
|
| 44 |
|
| 45 |
-
start_message = """
|
| 46 |
-
|
| 47 |
-
- You answer questions.
|
| 48 |
-
- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
|
| 49 |
-
- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>
|
| 50 |
-
"""
|
| 51 |
|
| 52 |
|
| 53 |
class StopOnTokens(StoppingCriteria):
|
|
@@ -177,13 +173,7 @@ with gr.Blocks(
|
|
| 177 |
) as demo:
|
| 178 |
conversation_id = gr.State(get_uuid)
|
| 179 |
gr.Markdown(
|
| 180 |
-
"""<h1><center>
|
| 181 |
-
|
| 182 |
-
This demo is of [MPT-7B-Chat](https://huggingface.co/mosaicml/mpt-7b-chat). It is based on [MPT-7B](https://huggingface.co/mosaicml/mpt-7b) fine-tuned with approximately [171,000 conversation samples from this dataset](https://huggingface.co/datasets/sam-mosaic/vicuna_alpaca_hc3_chatml) and another [217,000 from this dataset](https://huggingface.co/datasets/sam-mosaic/hhrlhf_evol_chatml).
|
| 183 |
-
|
| 184 |
-
If you're interested in [training](https://www.mosaicml.com/training) and [deploying](https://www.mosaicml.com/inference) your own MPT or LLMs, [sign up](https://forms.mosaicml.com/demo?utm_source=huggingface&utm_medium=referral&utm_campaign=mpt-7b) for MosaicML platform.
|
| 185 |
-
|
| 186 |
-
This is running on a smaller, shared GPU, so it may take a few seconds to respond. If you want to run it on your own GPU, you can [download the model from HuggingFace](https://huggingface.co/mosaicml/mpt-7b-chat) and run it locally. Or [Duplicate the Space](https://huggingface.co/spaces/mosaicml/mpt-7b-chat?duplicate=true) to skip the queue and run in a private space.
|
| 187 |
"""
|
| 188 |
)
|
| 189 |
chatbot = gr.Chatbot().style(height=500)
|
|
@@ -251,8 +241,8 @@ with gr.Blocks(
|
|
| 251 |
)
|
| 252 |
with gr.Row():
|
| 253 |
gr.Markdown(
|
| 254 |
-
"Disclaimer:
|
| 255 |
-
"factually accurate information.
|
| 256 |
"have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
|
| 257 |
"biased, or otherwise offensive outputs.",
|
| 258 |
elem_classes=["disclaimer"],
|
|
|
|
| 17 |
)
|
| 18 |
|
| 19 |
|
| 20 |
+
model_name = "timdettmers/guanaco-33b-merged"
|
| 21 |
max_new_tokens = 1536
|
| 22 |
|
| 23 |
# # small testing model:
|
| 24 |
+
model_name = "gpt2"
|
| 25 |
+
max_new_tokens = 128
|
| 26 |
|
| 27 |
auth_token = os.getenv("HF_TOKEN", None)
|
| 28 |
|
|
|
|
| 30 |
|
| 31 |
m = AutoModelForCausalLM.from_pretrained(
|
| 32 |
model_name,
|
| 33 |
+
load_in_8bit=True,
|
| 34 |
torch_dtype=torch.bfloat16,
|
| 35 |
+
device_map="auto"
|
| 36 |
+
)
|
| 37 |
+
tok = AutoTokenizer.from_pretrained("decapoda-research/llama-7b-hf")
|
| 38 |
+
tok.bos_token_id = 1
|
|
|
|
| 39 |
|
| 40 |
+
# stop_token_ids = tok.convert_tokens_to_ids(["<|im_end|>", "<|endoftext|>"])
|
| 41 |
|
| 42 |
print(f"Successfully loaded the model {model_name} into memory")
|
| 43 |
|
| 44 |
|
| 45 |
+
start_message = """A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions."""
|
| 46 |
+
prompt = f"{description} ### Human: {user_query} ### Assistant:"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
|
| 49 |
class StopOnTokens(StoppingCriteria):
|
|
|
|
| 173 |
) as demo:
|
| 174 |
conversation_id = gr.State(get_uuid)
|
| 175 |
gr.Markdown(
|
| 176 |
+
"""<h1><center>Guanaco-33b playground</center></h1>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
"""
|
| 178 |
)
|
| 179 |
chatbot = gr.Chatbot().style(height=500)
|
|
|
|
| 241 |
)
|
| 242 |
with gr.Row():
|
| 243 |
gr.Markdown(
|
| 244 |
+
"Disclaimer: The model can produce factually incorrect output, and should not be relied on to produce "
|
| 245 |
+
"factually accurate information. The model was trained on various public datasets; while great efforts "
|
| 246 |
"have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
|
| 247 |
"biased, or otherwise offensive outputs.",
|
| 248 |
elem_classes=["disclaimer"],
|