added condition for loki being active and removed prints
Browse files
app.py
CHANGED
|
@@ -108,18 +108,8 @@ def llama_generation(input_text: str,
|
|
| 108 |
# This makes a greedy generation when temperature is passed to 0 (selects the next token sequence generated by model regardless). Selects each token with the highest probability
|
| 109 |
if temperature == 0:
|
| 110 |
generate_kwargs["do_sample"] = False
|
| 111 |
-
|
| 112 |
-
#
|
| 113 |
-
# lock = threading.Lock()
|
| 114 |
-
|
| 115 |
-
# def generate_llama():
|
| 116 |
-
# with lock:
|
| 117 |
-
# # Generate the response using the llama_model
|
| 118 |
-
# response = llama_model.generate(**generate_kwargs)
|
| 119 |
-
# return response
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
# start the thread and wait for it to finish
|
| 123 |
thread = threading.Thread(target=llama_model.generate, kwargs=generate_kwargs)
|
| 124 |
thread.start()
|
| 125 |
thread.join()
|
|
@@ -161,6 +151,11 @@ def bot_comms(input_text: str,
|
|
| 161 |
cuda_info = check_cuda()
|
| 162 |
yield cuda_info
|
| 163 |
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
if input_text == "switch to llama":
|
| 166 |
llm_mode = input_text
|
|
@@ -180,7 +175,6 @@ def bot_comms(input_text: str,
|
|
| 180 |
if llm_mode == "switch to llama":
|
| 181 |
streamer = llama_generation(input_text=input_text, history=history, temperature=temperature, max_new_tokens=max_new_tokens)
|
| 182 |
outputs = []
|
| 183 |
-
print('llama responded to that.')
|
| 184 |
for text in streamer:
|
| 185 |
outputs.append(text)
|
| 186 |
yield "".join(outputs)
|
|
@@ -188,7 +182,6 @@ def bot_comms(input_text: str,
|
|
| 188 |
if llm_mode == "switch to gpt-4o":
|
| 189 |
stream = gpt_generation(input=input_text, llama_output="", mode="gpt-4o")
|
| 190 |
outputs = []
|
| 191 |
-
print("gpt-4o only about to answer.")
|
| 192 |
for chunk in stream:
|
| 193 |
if chunk.choices[0].delta.content is not None:
|
| 194 |
text = chunk.choices[0].delta.content
|
|
@@ -198,20 +191,18 @@ def bot_comms(input_text: str,
|
|
| 198 |
if llm_mode == "switch to gpt-3.5-turbo":
|
| 199 |
stream = gpt_generation(input=input_text, llama_output="", mode="gpt-3.5-turbo")
|
| 200 |
outputs = []
|
| 201 |
-
print("gpt-3.5-turbo is about to answer.")
|
| 202 |
for chunk in stream:
|
| 203 |
if chunk.choices[0].delta.content is not None:
|
| 204 |
text = chunk.choices[0].delta.content
|
| 205 |
outputs.append(text)
|
| 206 |
yield "".join(outputs)
|
| 207 |
|
| 208 |
-
if llm_mode is None or llm_mode == "":
|
| 209 |
streamer = llama_generation(input_text=input_text, history=history, temperature=temperature, max_new_tokens=max_new_tokens)
|
| 210 |
output_text = output_list([text for text in streamer])
|
| 211 |
stream = gpt_generation(input=input_text, llama_output=output_text, mode="gpt-4o")
|
| 212 |
|
| 213 |
outputs = []
|
| 214 |
-
print("Loki is activated to answer")
|
| 215 |
for chunk in stream:
|
| 216 |
if chunk.choices[0].delta.content is not None:
|
| 217 |
text = chunk.choices[0].delta.content
|
|
|
|
| 108 |
# This makes a greedy generation when temperature is passed to 0 (selects the next token sequence generated by model regardless). Selects each token with the highest probability
|
| 109 |
if temperature == 0:
|
| 110 |
generate_kwargs["do_sample"] = False
|
| 111 |
+
|
| 112 |
+
# start the thread
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
thread = threading.Thread(target=llama_model.generate, kwargs=generate_kwargs)
|
| 114 |
thread.start()
|
| 115 |
thread.join()
|
|
|
|
| 151 |
cuda_info = check_cuda()
|
| 152 |
yield cuda_info
|
| 153 |
return
|
| 154 |
+
|
| 155 |
+
if input_text == "switch to loki":
|
| 156 |
+
llm_mode = input_text
|
| 157 |
+
yield "Loki is on ποΈ"
|
| 158 |
+
return
|
| 159 |
|
| 160 |
if input_text == "switch to llama":
|
| 161 |
llm_mode = input_text
|
|
|
|
| 175 |
if llm_mode == "switch to llama":
|
| 176 |
streamer = llama_generation(input_text=input_text, history=history, temperature=temperature, max_new_tokens=max_new_tokens)
|
| 177 |
outputs = []
|
|
|
|
| 178 |
for text in streamer:
|
| 179 |
outputs.append(text)
|
| 180 |
yield "".join(outputs)
|
|
|
|
| 182 |
if llm_mode == "switch to gpt-4o":
|
| 183 |
stream = gpt_generation(input=input_text, llama_output="", mode="gpt-4o")
|
| 184 |
outputs = []
|
|
|
|
| 185 |
for chunk in stream:
|
| 186 |
if chunk.choices[0].delta.content is not None:
|
| 187 |
text = chunk.choices[0].delta.content
|
|
|
|
| 191 |
if llm_mode == "switch to gpt-3.5-turbo":
|
| 192 |
stream = gpt_generation(input=input_text, llama_output="", mode="gpt-3.5-turbo")
|
| 193 |
outputs = []
|
|
|
|
| 194 |
for chunk in stream:
|
| 195 |
if chunk.choices[0].delta.content is not None:
|
| 196 |
text = chunk.choices[0].delta.content
|
| 197 |
outputs.append(text)
|
| 198 |
yield "".join(outputs)
|
| 199 |
|
| 200 |
+
if llm_mode is None or llm_mode == "" or llm_mode == "switch to loki":
|
| 201 |
streamer = llama_generation(input_text=input_text, history=history, temperature=temperature, max_new_tokens=max_new_tokens)
|
| 202 |
output_text = output_list([text for text in streamer])
|
| 203 |
stream = gpt_generation(input=input_text, llama_output=output_text, mode="gpt-4o")
|
| 204 |
|
| 205 |
outputs = []
|
|
|
|
| 206 |
for chunk in stream:
|
| 207 |
if chunk.choices[0].delta.content is not None:
|
| 208 |
text = chunk.choices[0].delta.content
|