Update app.py
Browse files
app.py
CHANGED
|
@@ -63,8 +63,13 @@ tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
|
|
| 63 |
# temperature=0.4,
|
| 64 |
# stream = True
|
| 65 |
# eos_token_id=terminators
|
| 66 |
-
)
|
| 67 |
# send additional parameters to model for generation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
#model = llama_cpp.Llama(model_path = tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf AutoModelForCausalLM.from_pretrained(llm_model, model_file = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", model_type="llama", gpu_layers=0, config = generation_config)
|
| 70 |
model = Llama.from_pretrained(
|
|
@@ -74,6 +79,7 @@ model = Llama.from_pretrained(
|
|
| 74 |
temperature=0.75,
|
| 75 |
max_tokens=500,
|
| 76 |
top_p=0.95,
|
|
|
|
| 77 |
# callback_manager=callback_manager,
|
| 78 |
# verbose=True, # Verbose is required to pass to the callback manager
|
| 79 |
)
|
|
|
|
| 63 |
# temperature=0.4,
|
| 64 |
# stream = True
|
| 65 |
# eos_token_id=terminators
|
| 66 |
+
#)
|
| 67 |
# send additional parameters to model for generation
|
| 68 |
+
terminators = [
|
| 69 |
+
tokenizer.eos_token_id, # End-of-Sequence Token that indicates where the model should consider the text sequence to be complete
|
| 70 |
+
tokenizer.convert_tokens_to_ids("<|eot_id|>") # Converts a token strings in a single/ sequence of integer id using the vocabulary
|
| 71 |
+
]
|
| 72 |
+
# indicates the end of a sequence
|
| 73 |
|
| 74 |
#model = llama_cpp.Llama(model_path = tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf AutoModelForCausalLM.from_pretrained(llm_model, model_file = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", model_type="llama", gpu_layers=0, config = generation_config)
|
| 75 |
model = Llama.from_pretrained(
|
|
|
|
| 79 |
temperature=0.75,
|
| 80 |
max_tokens=500,
|
| 81 |
top_p=0.95,
|
| 82 |
+
eos_token_id=terminators
|
| 83 |
# callback_manager=callback_manager,
|
| 84 |
# verbose=True, # Verbose is required to pass to the callback manager
|
| 85 |
)
|