Update app.py
Browse files
app.py
CHANGED
|
@@ -8,7 +8,7 @@ import time
|
|
| 8 |
import sys
|
| 9 |
|
| 10 |
os.system("pip install --upgrade pip")
|
| 11 |
-
os.system('''CMAKE_ARGS="-DLLAMA_AVX512=ON" pip install llama-cpp-python''')
|
| 12 |
|
| 13 |
from huggingface_hub import snapshot_download
|
| 14 |
from llama_cpp import Llama
|
|
@@ -46,7 +46,7 @@ def get_system_tokens(model):
|
|
| 46 |
|
| 47 |
|
| 48 |
repo_name = "TheBloke/CausalLM-14B-GGUF"
|
| 49 |
-
model_name = "causallm_14b.
|
| 50 |
snapshot_download(repo_id=repo_name, local_dir=".", allow_patterns=model_name)
|
| 51 |
|
| 52 |
model = Llama(
|
|
|
|
| 8 |
import sys
|
| 9 |
|
| 10 |
os.system("pip install --upgrade pip")
|
| 11 |
+
os.system('''CMAKE_ARGS="-DLLAMA_AVX512=ON -DLLAMA_AVX512_VBMI=ON -DLLAMA_AVX512_VNNI=ON -DLLAMA_FP16_VA=ON" pip install llama-cpp-python''')
|
| 12 |
|
| 13 |
from huggingface_hub import snapshot_download
|
| 14 |
from llama_cpp import Llama
|
|
|
|
| 46 |
|
| 47 |
|
| 48 |
repo_name = "TheBloke/CausalLM-14B-GGUF"
|
| 49 |
+
model_name = "causallm_14b.Q4_0.gguf"
|
| 50 |
snapshot_download(repo_id=repo_name, local_dir=".", allow_patterns=model_name)
|
| 51 |
|
| 52 |
model = Llama(
|