Spaces:

Norod78
/

OpenELM_3B_Demo

Running on Zero

App Files Files Community

Norod78 commited on May 7, 2024

Commit

811b009

verified ·

1 Parent(s): bc4ac2d

Provide the previous prompt as "History"

Browse files

Files changed (1) hide show

app.py +9 -4

app.py CHANGED Viewed

@@ -26,7 +26,6 @@ If you duplicate this space, make sure you have access to [meta-llama/Llama-2-7b
 because this model uses it as a tokenizer.
 # Note: Use this model for only for completing sentences and instruction following.
-## While the user interface is a chatbot for convenience, this is an instruction tuned model not fine-tuned for chatbot tasks. As such, the model is not provided a chat history and will complete your text based on the last given prompt only.
 """
 LICENSE = """
@@ -35,8 +34,6 @@ LICENSE = """
 ---
 As a derivative work of [OpenELM-3B-Instruct](https://huggingface.co/apple/OpenELM-3B-Instruct) by Apple,
 this demo is governed by the original [license](https://huggingface.co/apple/OpenELM-3B-Instruct/blob/main/LICENSE).
-This demo Space was created by [Doron Adler](https://linktr.ee/Norod78)
 """
 if not torch.cuda.is_available():
@@ -51,6 +48,7 @@ if torch.cuda.is_available():
     if tokenizer.pad_token == None:
         tokenizer.pad_token = tokenizer.eos_token
         tokenizer.pad_token_id = tokenizer.eos_token_id
 @spaces.GPU
 def generate(
@@ -63,6 +61,13 @@ def generate(
     repetition_penalty: float = 1.4,
 ) -> Iterator[str]:
     input_ids = tokenizer([message], return_tensors="pt").input_ids
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
@@ -82,7 +87,7 @@ def generate(
         pad_token_id = tokenizer.eos_token_id,
         repetition_penalty=repetition_penalty,
         no_repeat_ngram_size=5,
-        early_stopping=True,
     )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()

 because this model uses it as a tokenizer.
 # Note: Use this model for only for completing sentences and instruction following.
 """
 LICENSE = """
 ---
 As a derivative work of [OpenELM-3B-Instruct](https://huggingface.co/apple/OpenELM-3B-Instruct) by Apple,
 this demo is governed by the original [license](https://huggingface.co/apple/OpenELM-3B-Instruct/blob/main/LICENSE).
 """
 if not torch.cuda.is_available():
     if tokenizer.pad_token == None:
         tokenizer.pad_token = tokenizer.eos_token
         tokenizer.pad_token_id = tokenizer.eos_token_id
+        model.config.pad_token_id = tokenizer.eos_token_id
 @spaces.GPU
 def generate(
     repetition_penalty: float = 1.4,
 ) -> Iterator[str]:
+    historical_text = ""
+    #Prepend the entire chat history to the message with new lines between each message
+    for user, assistant in chat_history:
+        historical_text += f"\n{user}\n{assistant}"
+    if len(historical_text) > 0:
+        message = historical_text + f"\n{message}"
     input_ids = tokenizer([message], return_tensors="pt").input_ids
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         pad_token_id = tokenizer.eos_token_id,
         repetition_penalty=repetition_penalty,
         no_repeat_ngram_size=5,
+        early_stopping=False,
     )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()