ChristopherMarais commited on
Commit
6b642d0
·
verified ·
1 Parent(s): 362f58e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -41
app.py CHANGED
@@ -116,59 +116,52 @@ def update_chat(message, history):
116
  history.append({"role": "user", "content": message})
117
  return history, message, ""
118
 
119
- # def get_assistant_response(message, history, max_tokens, temperature, top_p, qa_chain_state_dict):
120
- # """
121
- # Generate the assistant's response using the QA chain (if available) or fallback to plain chat.
122
- # The pre-prompt is always included by concatenating it to the user's new question.
123
- # """
124
- # qa_chain = qa_chain_state_dict.get("qa_chain")
125
-
126
- # if qa_chain is not None:
127
- # # Format history to the plain-text format expected by the QA chain
128
- # formatted_history = format_chat_history(history)
129
- # # Prepend the pre-prompt to the current question
130
- # combined_question = PRE_PROMPT + "\n" + message
131
- # response = qa_chain.invoke({"question": combined_question, "chat_history": formatted_history})
132
- # answer = response.get("answer", "")
133
- # history.append({"role": "assistant", "content": answer})
134
- # return history, {"qa_chain": qa_chain}
135
-
136
- # # Fallback: Plain Chat Mode using the InferenceClient (pre-prompt already included here)
137
- # messages = [{"role": "system", "content": PRE_PROMPT}] + history
138
- # response = ""
139
- # result = client.chat_completion(
140
- # messages,
141
- # max_tokens=max_tokens,
142
- # stream=False,
143
- # temperature=temperature,
144
- # top_p=top_p,
145
- # )
146
- # for token_message in result:
147
- # token = token_message.choices[0].delta.content
148
- # response += token
149
-
150
- # history.append({"role": "assistant", "content": response})
151
- # return history, {"qa_chain": qa_chain}
152
-
153
  def get_assistant_response(message, history, max_tokens, temperature, top_p, qa_chain_state_dict):
154
  qa_chain = qa_chain_state_dict.get("qa_chain")
155
 
156
  if qa_chain is not None:
157
- # Format history to the plain-text format expected by the QA chain
158
  formatted_history = format_chat_history(history)
159
- # Prepend the pre-prompt to the current question
160
- combined_question = PRE_PROMPT + "\n" + message
 
 
 
 
161
  response = qa_chain.invoke({"question": combined_question, "chat_history": formatted_history})
162
  answer = response.get("answer", "").strip()
163
 
164
- # Check if the answer is empty and apply a fallback response if needed.
165
  if not answer:
166
- answer = "I'm sorry, I couldn't retrieve a clear answer. Feel free t ocontact consider Christopher directly: https://gcmarais.com/contact/"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
  history.append({"role": "assistant", "content": answer})
169
  return history, {"qa_chain": qa_chain}
170
 
171
- # Fallback: Plain Chat Mode using the InferenceClient
172
  messages = [{"role": "system", "content": PRE_PROMPT}] + history
173
  response = ""
174
  result = client.chat_completion(
@@ -184,12 +177,14 @@ def get_assistant_response(message, history, max_tokens, temperature, top_p, qa_
184
 
185
  response = response.strip()
186
  if not response:
187
- response = "I'm sorry, I couldn't generate a response. Please try asking in a different way. Alterantively, consider contacting Christopher directly: https://gcmarais.com/contact/"
 
188
 
189
  history.append({"role": "assistant", "content": response})
190
  return history, {"qa_chain": qa_chain}
191
 
192
 
 
193
  # Global InferenceClient for plain chat (fallback)
194
  client = InferenceClient("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
195
 
 
116
  history.append({"role": "user", "content": message})
117
  return history, message, ""
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  def get_assistant_response(message, history, max_tokens, temperature, top_p, qa_chain_state_dict):
120
  qa_chain = qa_chain_state_dict.get("qa_chain")
121
 
122
  if qa_chain is not None:
123
+ # Format chat history to the plain-text format expected by the QA chain.
124
  formatted_history = format_chat_history(history)
125
+
126
+ # Update the pre-prompt to encourage speculative responses.
127
+ speculative_pre_prompt = PRE_PROMPT + "\nIf you're not completely sure, please provide your best guess and mention that it is speculative."
128
+ combined_question = speculative_pre_prompt + "\n" + message
129
+
130
+ # Try retrieving an answer via the QA chain.
131
  response = qa_chain.invoke({"question": combined_question, "chat_history": formatted_history})
132
  answer = response.get("answer", "").strip()
133
 
134
+ # If no answer is returned, try the fallback plain chat mode with adjusted parameters.
135
  if not answer:
136
+ # Increase temperature and optionally max_tokens for fallback.
137
+ increased_temperature = min(temperature + 0.2, 1.0) # Cap temperature at 1.0
138
+ increased_max_tokens = max_tokens + 128 # Increase max tokens for a longer response if needed
139
+
140
+ speculative_prompt = speculative_pre_prompt + "\n" + message
141
+ messages = [{"role": "system", "content": speculative_prompt}] + history
142
+ response = ""
143
+ result = client.chat_completion(
144
+ messages,
145
+ max_tokens=increased_max_tokens,
146
+ stream=False,
147
+ temperature=increased_temperature,
148
+ top_p=top_p,
149
+ )
150
+ for token_message in result:
151
+ token = token_message.choices[0].delta.content
152
+ response += token
153
+ answer = response.strip()
154
+
155
+ # Final fallback if still empty.
156
+ if not answer:
157
+ answer = ("I'm sorry, I couldn't retrieve a clear answer. "
158
+ "However, based on the available context, here is my best guess: "
159
+ "[speculative answer].")
160
 
161
  history.append({"role": "assistant", "content": answer})
162
  return history, {"qa_chain": qa_chain}
163
 
164
+ # Fallback: Plain Chat Mode using the InferenceClient when no QA chain is available.
165
  messages = [{"role": "system", "content": PRE_PROMPT}] + history
166
  response = ""
167
  result = client.chat_completion(
 
177
 
178
  response = response.strip()
179
  if not response:
180
+ response = ("I'm sorry, I couldn't generate a response. Please try asking in a different way. "
181
+ "Alternatively, consider contacting Christopher directly: https://gcmarais.com/contact/")
182
 
183
  history.append({"role": "assistant", "content": response})
184
  return history, {"qa_chain": qa_chain}
185
 
186
 
187
+
188
  # Global InferenceClient for plain chat (fallback)
189
  client = InferenceClient("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
190