Spaces:

r3gm
/

ConversaDocs

Paused

App Files Files Community

Roger Condori commited on Aug 7, 2023

Commit

85c5bca

unverified ·

1 Parent(s): 55c1635

change default model and added limits for demo

Browse files

Files changed (1) hide show

conversadocs/bones.py +7 -4

conversadocs/bones.py CHANGED Viewed

@@ -100,7 +100,7 @@ class DocChat(param.Parameterized):
         super(DocChat, self).__init__( **params)
         self.loaded_file = ["demo_docs/demo.txt"]
         self.db = load_db(self.loaded_file)
-        self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q5_1.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
         self.qa = q_a(self.db, "stuff", self.k_value, self.llm)
@@ -133,7 +133,7 @@ class DocChat(param.Parameterized):
           result = self.qa({"question": query, "chat_history": self.chat_history})
         except:
           print("Error not get response from model, reloaded default llama-2 7B config")
-          self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q5_1.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
           self.qa = q_a(self.db, "stuff", k_max, self.llm)
           result = self.qa({"question": query, "chat_history": self.chat_history})
@@ -145,6 +145,9 @@ class DocChat(param.Parameterized):
     def summarize(self, chunk_size=2000, chunk_overlap=100):
         # load docs
         documents = []
         for file in self.loaded_file:
           ext = "." + file.rsplit(".", 1)[-1]
@@ -196,7 +199,7 @@ class DocChat(param.Parameterized):
               self.k_value = k
               return f"Loaded {file_} [GPU INFERENCE]"
             except:
-              self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q5_1.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
               return "No valid model | Reloaded Reloaded default llama-2 7B config"
         else:
             try:
@@ -222,7 +225,7 @@ class DocChat(param.Parameterized):
               self.k_value = k
               return f"Loaded {file_} [CPU INFERENCE SLOW]"
             except:
-              self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q5_1.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
               return "No valid model | Reloaded default llama-2 7B config"
     def default_falcon_model(self, HF_TOKEN):

         super(DocChat, self).__init__( **params)
         self.loaded_file = ["demo_docs/demo.txt"]
         self.db = load_db(self.loaded_file)
+        self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q2_K.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
         self.qa = q_a(self.db, "stuff", self.k_value, self.llm)
           result = self.qa({"question": query, "chat_history": self.chat_history})
         except:
           print("Error not get response from model, reloaded default llama-2 7B config")
+          self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q2_K.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
           self.qa = q_a(self.db, "stuff", k_max, self.llm)
           result = self.qa({"question": query, "chat_history": self.chat_history})
     def summarize(self, chunk_size=2000, chunk_overlap=100):
         # load docs
+        if "SET_LIMIT" == os.getenv("DEMO"):
+            return "Since the space only uses the CPU, the summarization function cannot be used."
         documents = []
         for file in self.loaded_file:
           ext = "." + file.rsplit(".", 1)[-1]
               self.k_value = k
               return f"Loaded {file_} [GPU INFERENCE]"
             except:
+              self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q2_K.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
               return "No valid model | Reloaded Reloaded default llama-2 7B config"
         else:
             try:
               self.k_value = k
               return f"Loaded {file_} [CPU INFERENCE SLOW]"
             except:
+              self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q2_K.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
               return "No valid model | Reloaded default llama-2 7B config"
     def default_falcon_model(self, HF_TOKEN):