Update app.py
Browse files
app.py
CHANGED
|
@@ -54,20 +54,20 @@ llm_model = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
|
|
| 54 |
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
|
| 55 |
#initiate model and tokenizer
|
| 56 |
|
| 57 |
-
generation_config = AutoConfig.from_pretrained(
|
| 58 |
-
"TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
|
| 59 |
-
max_new_tokens= 300,
|
| 60 |
# do_sample=True,
|
| 61 |
# stream = streamer,
|
| 62 |
-
top_p=0.95,
|
| 63 |
-
temperature=0.4,
|
| 64 |
-
stream = True
|
| 65 |
# eos_token_id=terminators
|
| 66 |
)
|
| 67 |
# send additional parameters to model for generation
|
| 68 |
|
| 69 |
#model = llama_cpp.Llama(model_path = tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf AutoModelForCausalLM.from_pretrained(llm_model, model_file = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", model_type="llama", gpu_layers=0, config = generation_config)
|
| 70 |
-
model = Llama(
|
| 71 |
model_path="./tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
|
| 72 |
chat_format="llama-2",
|
| 73 |
n_gpu_layers = 0,
|
|
|
|
| 54 |
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
|
| 55 |
#initiate model and tokenizer
|
| 56 |
|
| 57 |
+
#generation_config = AutoConfig.from_pretrained(
|
| 58 |
+
# "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
|
| 59 |
+
# max_new_tokens= 300,
|
| 60 |
# do_sample=True,
|
| 61 |
# stream = streamer,
|
| 62 |
+
# top_p=0.95,
|
| 63 |
+
# temperature=0.4,
|
| 64 |
+
# stream = True
|
| 65 |
# eos_token_id=terminators
|
| 66 |
)
|
| 67 |
# send additional parameters to model for generation
|
| 68 |
|
| 69 |
#model = llama_cpp.Llama(model_path = tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf AutoModelForCausalLM.from_pretrained(llm_model, model_file = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", model_type="llama", gpu_layers=0, config = generation_config)
|
| 70 |
+
model = Llama.from_pretrained(
|
| 71 |
model_path="./tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
|
| 72 |
chat_format="llama-2",
|
| 73 |
n_gpu_layers = 0,
|