Spaces:
Paused
Paused
Roger Condori
commited on
change default model and added limits for demo
Browse files- conversadocs/bones.py +7 -4
conversadocs/bones.py
CHANGED
|
@@ -100,7 +100,7 @@ class DocChat(param.Parameterized):
|
|
| 100 |
super(DocChat, self).__init__( **params)
|
| 101 |
self.loaded_file = ["demo_docs/demo.txt"]
|
| 102 |
self.db = load_db(self.loaded_file)
|
| 103 |
-
self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.
|
| 104 |
self.qa = q_a(self.db, "stuff", self.k_value, self.llm)
|
| 105 |
|
| 106 |
|
|
@@ -133,7 +133,7 @@ class DocChat(param.Parameterized):
|
|
| 133 |
result = self.qa({"question": query, "chat_history": self.chat_history})
|
| 134 |
except:
|
| 135 |
print("Error not get response from model, reloaded default llama-2 7B config")
|
| 136 |
-
self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.
|
| 137 |
self.qa = q_a(self.db, "stuff", k_max, self.llm)
|
| 138 |
result = self.qa({"question": query, "chat_history": self.chat_history})
|
| 139 |
|
|
@@ -145,6 +145,9 @@ class DocChat(param.Parameterized):
|
|
| 145 |
|
| 146 |
def summarize(self, chunk_size=2000, chunk_overlap=100):
|
| 147 |
# load docs
|
|
|
|
|
|
|
|
|
|
| 148 |
documents = []
|
| 149 |
for file in self.loaded_file:
|
| 150 |
ext = "." + file.rsplit(".", 1)[-1]
|
|
@@ -196,7 +199,7 @@ class DocChat(param.Parameterized):
|
|
| 196 |
self.k_value = k
|
| 197 |
return f"Loaded {file_} [GPU INFERENCE]"
|
| 198 |
except:
|
| 199 |
-
self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.
|
| 200 |
return "No valid model | Reloaded Reloaded default llama-2 7B config"
|
| 201 |
else:
|
| 202 |
try:
|
|
@@ -222,7 +225,7 @@ class DocChat(param.Parameterized):
|
|
| 222 |
self.k_value = k
|
| 223 |
return f"Loaded {file_} [CPU INFERENCE SLOW]"
|
| 224 |
except:
|
| 225 |
-
self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.
|
| 226 |
return "No valid model | Reloaded default llama-2 7B config"
|
| 227 |
|
| 228 |
def default_falcon_model(self, HF_TOKEN):
|
|
|
|
| 100 |
super(DocChat, self).__init__( **params)
|
| 101 |
self.loaded_file = ["demo_docs/demo.txt"]
|
| 102 |
self.db = load_db(self.loaded_file)
|
| 103 |
+
self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q2_K.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
|
| 104 |
self.qa = q_a(self.db, "stuff", self.k_value, self.llm)
|
| 105 |
|
| 106 |
|
|
|
|
| 133 |
result = self.qa({"question": query, "chat_history": self.chat_history})
|
| 134 |
except:
|
| 135 |
print("Error not get response from model, reloaded default llama-2 7B config")
|
| 136 |
+
self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q2_K.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
|
| 137 |
self.qa = q_a(self.db, "stuff", k_max, self.llm)
|
| 138 |
result = self.qa({"question": query, "chat_history": self.chat_history})
|
| 139 |
|
|
|
|
| 145 |
|
| 146 |
def summarize(self, chunk_size=2000, chunk_overlap=100):
|
| 147 |
# load docs
|
| 148 |
+
if "SET_LIMIT" == os.getenv("DEMO"):
|
| 149 |
+
return "Since the space only uses the CPU, the summarization function cannot be used."
|
| 150 |
+
|
| 151 |
documents = []
|
| 152 |
for file in self.loaded_file:
|
| 153 |
ext = "." + file.rsplit(".", 1)[-1]
|
|
|
|
| 199 |
self.k_value = k
|
| 200 |
return f"Loaded {file_} [GPU INFERENCE]"
|
| 201 |
except:
|
| 202 |
+
self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q2_K.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
|
| 203 |
return "No valid model | Reloaded Reloaded default llama-2 7B config"
|
| 204 |
else:
|
| 205 |
try:
|
|
|
|
| 225 |
self.k_value = k
|
| 226 |
return f"Loaded {file_} [CPU INFERENCE SLOW]"
|
| 227 |
except:
|
| 228 |
+
self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q2_K.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
|
| 229 |
return "No valid model | Reloaded default llama-2 7B config"
|
| 230 |
|
| 231 |
def default_falcon_model(self, HF_TOKEN):
|