Spaces:
Build error
Build error
Ethan Chang
commited on
Commit
·
6a3dbe6
1
Parent(s):
0f566b9
Modified pathing for tinyllama, addedin huggingface downloader for tinyllama
Browse files
code/modules/chat/chat_model_loader.py
CHANGED
|
@@ -5,6 +5,8 @@ from langchain_community.llms import LlamaCpp
|
|
| 5 |
import torch
|
| 6 |
import transformers
|
| 7 |
import os
|
|
|
|
|
|
|
| 8 |
from langchain.callbacks.manager import CallbackManager
|
| 9 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
| 10 |
|
|
@@ -14,6 +16,14 @@ class ChatModelLoader:
|
|
| 14 |
self.config = config
|
| 15 |
self.huggingface_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
def load_chat_model(self):
|
| 18 |
if self.config["llm_params"]["llm_loader"] == "openai":
|
| 19 |
llm = ChatOpenAI(
|
|
@@ -21,7 +31,8 @@ class ChatModelLoader:
|
|
| 21 |
)
|
| 22 |
elif self.config["llm_params"]["llm_loader"] == "local_llm":
|
| 23 |
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
|
| 24 |
-
model_path = self.config["llm_params"]["local_llm_params"]["model"]
|
|
|
|
| 25 |
llm = LlamaCpp(
|
| 26 |
model_path=model_path,
|
| 27 |
n_batch=n_batch,
|
|
|
|
| 5 |
import torch
|
| 6 |
import transformers
|
| 7 |
import os
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from huggingface_hub import hf_hub_download
|
| 10 |
from langchain.callbacks.manager import CallbackManager
|
| 11 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
| 12 |
|
|
|
|
| 16 |
self.config = config
|
| 17 |
self.huggingface_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
| 18 |
|
| 19 |
+
def _verify_model_cache(self, model_cache_path):
|
| 20 |
+
hf_hub_download(
|
| 21 |
+
repo_id="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
|
| 22 |
+
filename="tinyllama-1.1b-chat-v1.0.Q5_0.gguf",
|
| 23 |
+
cache_dir=model_cache_path
|
| 24 |
+
)
|
| 25 |
+
return str(list(Path(model_cache_path).glob("*/snapshots/*/*.gguf"))[0])
|
| 26 |
+
|
| 27 |
def load_chat_model(self):
|
| 28 |
if self.config["llm_params"]["llm_loader"] == "openai":
|
| 29 |
llm = ChatOpenAI(
|
|
|
|
| 31 |
)
|
| 32 |
elif self.config["llm_params"]["llm_loader"] == "local_llm":
|
| 33 |
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
|
| 34 |
+
model_path = self._verify_model_cache(self.config["llm_params"]["local_llm_params"]["model"])
|
| 35 |
+
print(model_path)
|
| 36 |
llm = LlamaCpp(
|
| 37 |
model_path=model_path,
|
| 38 |
n_batch=n_batch,
|
code/modules/config/constants.py
CHANGED
|
@@ -78,5 +78,5 @@ Question: {question}
|
|
| 78 |
|
| 79 |
# Model Paths
|
| 80 |
|
| 81 |
-
LLAMA_PATH = "../storage/models/tinyllama
|
| 82 |
MISTRAL_PATH = "storage/models/mistral-7b-v0.1.Q4_K_M.gguf"
|
|
|
|
| 78 |
|
| 79 |
# Model Paths
|
| 80 |
|
| 81 |
+
LLAMA_PATH = "../storage/models/tinyllama"
|
| 82 |
MISTRAL_PATH = "storage/models/mistral-7b-v0.1.Q4_K_M.gguf"
|