Spaces:
Running
on
Zero
Running
on
Zero
Provide the previous prompt as "History"
Browse files
app.py
CHANGED
|
@@ -26,7 +26,6 @@ If you duplicate this space, make sure you have access to [meta-llama/Llama-2-7b
|
|
| 26 |
because this model uses it as a tokenizer.
|
| 27 |
|
| 28 |
# Note: Use this model for only for completing sentences and instruction following.
|
| 29 |
-
## While the user interface is a chatbot for convenience, this is an instruction tuned model not fine-tuned for chatbot tasks. As such, the model is not provided a chat history and will complete your text based on the last given prompt only.
|
| 30 |
"""
|
| 31 |
|
| 32 |
LICENSE = """
|
|
@@ -35,8 +34,6 @@ LICENSE = """
|
|
| 35 |
---
|
| 36 |
As a derivative work of [OpenELM-3B-Instruct](https://huggingface.co/apple/OpenELM-3B-Instruct) by Apple,
|
| 37 |
this demo is governed by the original [license](https://huggingface.co/apple/OpenELM-3B-Instruct/blob/main/LICENSE).
|
| 38 |
-
|
| 39 |
-
This demo Space was created by [Doron Adler](https://linktr.ee/Norod78)
|
| 40 |
"""
|
| 41 |
|
| 42 |
if not torch.cuda.is_available():
|
|
@@ -51,6 +48,7 @@ if torch.cuda.is_available():
|
|
| 51 |
if tokenizer.pad_token == None:
|
| 52 |
tokenizer.pad_token = tokenizer.eos_token
|
| 53 |
tokenizer.pad_token_id = tokenizer.eos_token_id
|
|
|
|
| 54 |
|
| 55 |
@spaces.GPU
|
| 56 |
def generate(
|
|
@@ -63,6 +61,13 @@ def generate(
|
|
| 63 |
repetition_penalty: float = 1.4,
|
| 64 |
) -> Iterator[str]:
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
input_ids = tokenizer([message], return_tensors="pt").input_ids
|
| 67 |
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
|
| 68 |
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
|
|
@@ -82,7 +87,7 @@ def generate(
|
|
| 82 |
pad_token_id = tokenizer.eos_token_id,
|
| 83 |
repetition_penalty=repetition_penalty,
|
| 84 |
no_repeat_ngram_size=5,
|
| 85 |
-
early_stopping=
|
| 86 |
)
|
| 87 |
t = Thread(target=model.generate, kwargs=generate_kwargs)
|
| 88 |
t.start()
|
|
|
|
| 26 |
because this model uses it as a tokenizer.
|
| 27 |
|
| 28 |
# Note: Use this model for only for completing sentences and instruction following.
|
|
|
|
| 29 |
"""
|
| 30 |
|
| 31 |
LICENSE = """
|
|
|
|
| 34 |
---
|
| 35 |
As a derivative work of [OpenELM-3B-Instruct](https://huggingface.co/apple/OpenELM-3B-Instruct) by Apple,
|
| 36 |
this demo is governed by the original [license](https://huggingface.co/apple/OpenELM-3B-Instruct/blob/main/LICENSE).
|
|
|
|
|
|
|
| 37 |
"""
|
| 38 |
|
| 39 |
if not torch.cuda.is_available():
|
|
|
|
| 48 |
if tokenizer.pad_token == None:
|
| 49 |
tokenizer.pad_token = tokenizer.eos_token
|
| 50 |
tokenizer.pad_token_id = tokenizer.eos_token_id
|
| 51 |
+
model.config.pad_token_id = tokenizer.eos_token_id
|
| 52 |
|
| 53 |
@spaces.GPU
|
| 54 |
def generate(
|
|
|
|
| 61 |
repetition_penalty: float = 1.4,
|
| 62 |
) -> Iterator[str]:
|
| 63 |
|
| 64 |
+
historical_text = ""
|
| 65 |
+
#Prepend the entire chat history to the message with new lines between each message
|
| 66 |
+
for user, assistant in chat_history:
|
| 67 |
+
historical_text += f"\n{user}\n{assistant}"
|
| 68 |
+
|
| 69 |
+
if len(historical_text) > 0:
|
| 70 |
+
message = historical_text + f"\n{message}"
|
| 71 |
input_ids = tokenizer([message], return_tensors="pt").input_ids
|
| 72 |
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
|
| 73 |
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
|
|
|
|
| 87 |
pad_token_id = tokenizer.eos_token_id,
|
| 88 |
repetition_penalty=repetition_penalty,
|
| 89 |
no_repeat_ngram_size=5,
|
| 90 |
+
early_stopping=False,
|
| 91 |
)
|
| 92 |
t = Thread(target=model.generate, kwargs=generate_kwargs)
|
| 93 |
t.start()
|