sandz7 commited on
Commit
e7f4aa1
Β·
verified Β·
1 Parent(s): d4c71d7

added condition for loki being active and removed prints

Browse files
Files changed (1) hide show
  1. app.py +8 -17
app.py CHANGED
@@ -108,18 +108,8 @@ def llama_generation(input_text: str,
108
  # This makes a greedy generation when temperature is passed to 0 (selects the next token sequence generated by model regardless). Selects each token with the highest probability
109
  if temperature == 0:
110
  generate_kwargs["do_sample"] = False
111
-
112
- # # Use a lock object to synchronize access to the llama_model
113
- # lock = threading.Lock()
114
-
115
- # def generate_llama():
116
- # with lock:
117
- # # Generate the response using the llama_model
118
- # response = llama_model.generate(**generate_kwargs)
119
- # return response
120
-
121
-
122
- # start the thread and wait for it to finish
123
  thread = threading.Thread(target=llama_model.generate, kwargs=generate_kwargs)
124
  thread.start()
125
  thread.join()
@@ -161,6 +151,11 @@ def bot_comms(input_text: str,
161
  cuda_info = check_cuda()
162
  yield cuda_info
163
  return
 
 
 
 
 
164
 
165
  if input_text == "switch to llama":
166
  llm_mode = input_text
@@ -180,7 +175,6 @@ def bot_comms(input_text: str,
180
  if llm_mode == "switch to llama":
181
  streamer = llama_generation(input_text=input_text, history=history, temperature=temperature, max_new_tokens=max_new_tokens)
182
  outputs = []
183
- print('llama responded to that.')
184
  for text in streamer:
185
  outputs.append(text)
186
  yield "".join(outputs)
@@ -188,7 +182,6 @@ def bot_comms(input_text: str,
188
  if llm_mode == "switch to gpt-4o":
189
  stream = gpt_generation(input=input_text, llama_output="", mode="gpt-4o")
190
  outputs = []
191
- print("gpt-4o only about to answer.")
192
  for chunk in stream:
193
  if chunk.choices[0].delta.content is not None:
194
  text = chunk.choices[0].delta.content
@@ -198,20 +191,18 @@ def bot_comms(input_text: str,
198
  if llm_mode == "switch to gpt-3.5-turbo":
199
  stream = gpt_generation(input=input_text, llama_output="", mode="gpt-3.5-turbo")
200
  outputs = []
201
- print("gpt-3.5-turbo is about to answer.")
202
  for chunk in stream:
203
  if chunk.choices[0].delta.content is not None:
204
  text = chunk.choices[0].delta.content
205
  outputs.append(text)
206
  yield "".join(outputs)
207
 
208
- if llm_mode is None or llm_mode == "":
209
  streamer = llama_generation(input_text=input_text, history=history, temperature=temperature, max_new_tokens=max_new_tokens)
210
  output_text = output_list([text for text in streamer])
211
  stream = gpt_generation(input=input_text, llama_output=output_text, mode="gpt-4o")
212
 
213
  outputs = []
214
- print("Loki is activated to answer")
215
  for chunk in stream:
216
  if chunk.choices[0].delta.content is not None:
217
  text = chunk.choices[0].delta.content
 
108
  # This makes a greedy generation when temperature is passed to 0 (selects the next token sequence generated by model regardless). Selects each token with the highest probability
109
  if temperature == 0:
110
  generate_kwargs["do_sample"] = False
111
+
112
+ # start the thread
 
 
 
 
 
 
 
 
 
 
113
  thread = threading.Thread(target=llama_model.generate, kwargs=generate_kwargs)
114
  thread.start()
115
  thread.join()
 
151
  cuda_info = check_cuda()
152
  yield cuda_info
153
  return
154
+
155
+ if input_text == "switch to loki":
156
+ llm_mode = input_text
157
+ yield "Loki is on πŸ‘οΈ"
158
+ return
159
 
160
  if input_text == "switch to llama":
161
  llm_mode = input_text
 
175
  if llm_mode == "switch to llama":
176
  streamer = llama_generation(input_text=input_text, history=history, temperature=temperature, max_new_tokens=max_new_tokens)
177
  outputs = []
 
178
  for text in streamer:
179
  outputs.append(text)
180
  yield "".join(outputs)
 
182
  if llm_mode == "switch to gpt-4o":
183
  stream = gpt_generation(input=input_text, llama_output="", mode="gpt-4o")
184
  outputs = []
 
185
  for chunk in stream:
186
  if chunk.choices[0].delta.content is not None:
187
  text = chunk.choices[0].delta.content
 
191
  if llm_mode == "switch to gpt-3.5-turbo":
192
  stream = gpt_generation(input=input_text, llama_output="", mode="gpt-3.5-turbo")
193
  outputs = []
 
194
  for chunk in stream:
195
  if chunk.choices[0].delta.content is not None:
196
  text = chunk.choices[0].delta.content
197
  outputs.append(text)
198
  yield "".join(outputs)
199
 
200
+ if llm_mode is None or llm_mode == "" or llm_mode == "switch to loki":
201
  streamer = llama_generation(input_text=input_text, history=history, temperature=temperature, max_new_tokens=max_new_tokens)
202
  output_text = output_list([text for text in streamer])
203
  stream = gpt_generation(input=input_text, llama_output=output_text, mode="gpt-4o")
204
 
205
  outputs = []
 
206
  for chunk in stream:
207
  if chunk.choices[0].delta.content is not None:
208
  text = chunk.choices[0].delta.content