Spaces:

sandz7
/

loki

Runtime error

App Files Files Community

sandz7 commited on May 26, 2024

Commit

e7f4aa1

verified ·

1 Parent(s): d4c71d7

added condition for loki being active and removed prints

Browse files

Files changed (1) hide show

app.py +8 -17

app.py CHANGED Viewed

@@ -108,18 +108,8 @@ def llama_generation(input_text: str,
     # This makes a greedy generation when temperature is passed to 0 (selects the next token sequence generated by model regardless). Selects each token with the highest probability
     if temperature == 0:
         generate_kwargs["do_sample"] = False
-    # # Use a lock object to synchronize access to the llama_model
-    # lock = threading.Lock()
-    # def generate_llama():
-    #     with lock:
-    #         # Generate the response using the llama_model
-    #         response = llama_model.generate(**generate_kwargs)
-    #         return response
-    # start the thread and wait for it to finish
     thread = threading.Thread(target=llama_model.generate, kwargs=generate_kwargs)
     thread.start()
     thread.join()
@@ -161,6 +151,11 @@ def bot_comms(input_text: str,
         cuda_info = check_cuda()
         yield cuda_info
         return
     if input_text == "switch to llama":
         llm_mode = input_text
@@ -180,7 +175,6 @@ def bot_comms(input_text: str,
     if llm_mode == "switch to llama":
         streamer = llama_generation(input_text=input_text, history=history, temperature=temperature, max_new_tokens=max_new_tokens)
         outputs = []
-        print('llama responded to that.')
         for text in streamer:
             outputs.append(text)
             yield "".join(outputs)
@@ -188,7 +182,6 @@ def bot_comms(input_text: str,
     if llm_mode == "switch to gpt-4o":
         stream = gpt_generation(input=input_text, llama_output="", mode="gpt-4o")
         outputs = []
-        print("gpt-4o only about to answer.")
         for chunk in stream:
             if chunk.choices[0].delta.content is not None:
                 text = chunk.choices[0].delta.content
@@ -198,20 +191,18 @@ def bot_comms(input_text: str,
     if llm_mode == "switch to gpt-3.5-turbo":
         stream = gpt_generation(input=input_text, llama_output="", mode="gpt-3.5-turbo")
         outputs = []
-        print("gpt-3.5-turbo is about to answer.")
         for chunk in stream:
             if chunk.choices[0].delta.content is not None:
                 text = chunk.choices[0].delta.content
                 outputs.append(text)
                 yield "".join(outputs)
-    if llm_mode is None or llm_mode == "":
         streamer = llama_generation(input_text=input_text, history=history, temperature=temperature, max_new_tokens=max_new_tokens)
         output_text = output_list([text for text in streamer])
         stream = gpt_generation(input=input_text, llama_output=output_text, mode="gpt-4o")
         outputs = []
-        print("Loki is activated to answer")
         for chunk in stream:
             if chunk.choices[0].delta.content is not None:
                 text = chunk.choices[0].delta.content

     # This makes a greedy generation when temperature is passed to 0 (selects the next token sequence generated by model regardless). Selects each token with the highest probability
     if temperature == 0:
         generate_kwargs["do_sample"] = False
+    # start the thread
     thread = threading.Thread(target=llama_model.generate, kwargs=generate_kwargs)
     thread.start()
     thread.join()
         cuda_info = check_cuda()
         yield cuda_info
         return
+    if input_text == "switch to loki":
+        llm_mode = input_text
+        yield "Loki is on 👁️"
+        return
     if input_text == "switch to llama":
         llm_mode = input_text
     if llm_mode == "switch to llama":
         streamer = llama_generation(input_text=input_text, history=history, temperature=temperature, max_new_tokens=max_new_tokens)
         outputs = []
         for text in streamer:
             outputs.append(text)
             yield "".join(outputs)
     if llm_mode == "switch to gpt-4o":
         stream = gpt_generation(input=input_text, llama_output="", mode="gpt-4o")
         outputs = []
         for chunk in stream:
             if chunk.choices[0].delta.content is not None:
                 text = chunk.choices[0].delta.content
     if llm_mode == "switch to gpt-3.5-turbo":
         stream = gpt_generation(input=input_text, llama_output="", mode="gpt-3.5-turbo")
         outputs = []
         for chunk in stream:
             if chunk.choices[0].delta.content is not None:
                 text = chunk.choices[0].delta.content
                 outputs.append(text)
                 yield "".join(outputs)
+    if llm_mode is None or llm_mode == "" or llm_mode == "switch to loki":
         streamer = llama_generation(input_text=input_text, history=history, temperature=temperature, max_new_tokens=max_new_tokens)
         output_text = output_list([text for text in streamer])
         stream = gpt_generation(input=input_text, llama_output=output_text, mode="gpt-4o")
         outputs = []
         for chunk in stream:
             if chunk.choices[0].delta.content is not None:
                 text = chunk.choices[0].delta.content