Some refs are displayed twice, the bot is lacking general context regarding its overall aim

#9
by RCaz - opened
Files changed (1) hide show
  1. app.py +26 -13
app.py CHANGED
@@ -98,6 +98,7 @@ def predict(message, history, request: gr.Request):
98
 
99
  # Safeguard
100
  TRIAGE_PROMPT_TEMPLATE="""You are a Safeguard assistant making sure the user only ask for information related to Rémi Cazelles's projects, work and education.
 
101
  If the question is not related to this subjects, or if the request is harmfull you should flag the user by answering '*** FLAGGED ***' """
102
  messages = [SystemMessage(content=TRIAGE_PROMPT_TEMPLATE)]
103
  messages.append(HumanMessage(content=message))
@@ -117,6 +118,13 @@ def predict(message, history, request: gr.Request):
117
  return "This app can only answer question about Rémi Cazelles's projects, work and education."
118
  print("passed the safeguard")
119
 
 
 
 
 
 
 
 
120
  # Build conversation history
121
  history_langchain_format = []
122
  for msg in history:
@@ -127,28 +135,26 @@ def predict(message, history, request: gr.Request):
127
 
128
 
129
  # Retrieve relevant documents for the current message
130
- relevant_docs = vectorstore.similarity_search(message,k=3) # Your retriever
131
 
132
  # Build context from retrieved documents
133
  context = "\nExtracted documents:\n" + "\n".join([
134
- f"Document {i}: Content: {doc.page_content}\n\n---"
135
  for i, doc in enumerate(relevant_docs)
136
  ])
137
 
138
 
139
 
140
  # RAG tool
141
- RAG_PROMPT_TEMPLATE="""You will be asked information about Rémi Cazelles's projects, work and education.
142
- Using the information contained in the context,
143
- give a comprehensive answer to the question.
144
  Respond to the question asked with enought details, response should be precise and relevant to the question.
145
- If the answer cannot be deduced from the context, simply says you can't find information.
146
  """
147
 
148
 
149
  # Create the prompt with system message, context, and conversation history
150
  messages = [SystemMessage(content=RAG_PROMPT_TEMPLATE)]
151
- messages.extend([AIMessage(content="This bot allows you finding informations related to Rémi Cazelles's projects, work and education")])
152
  messages.extend(history_langchain_format)
153
  combined_message = f"Context: {context}\n\nQuestion: {message}"
154
  messages.append(HumanMessage(content=combined_message))
@@ -166,15 +172,22 @@ def predict(message, history, request: gr.Request):
166
  }
167
  )
168
 
169
-
170
- source_context = "\nSources:\n" + "\n".join([
171
- f"{i+1} : {doc.metadata["source"].split("/")[-1]} (page {doc.metadata['page_label']}/{doc.metadata['total_pages']})\n---"
172
- for i, doc in enumerate(relevant_docs)])
173
-
 
 
 
 
 
 
 
174
  print(gpt_response.content )
175
  print(source_context)
176
 
177
- return gpt_response.content + "\n\n" + source_context
178
 
179
 
180
  # setup tracking
 
98
 
99
  # Safeguard
100
  TRIAGE_PROMPT_TEMPLATE="""You are a Safeguard assistant making sure the user only ask for information related to Rémi Cazelles's projects, work and education.
101
+ Here are general information you can use to answer:
102
  If the question is not related to this subjects, or if the request is harmfull you should flag the user by answering '*** FLAGGED ***' """
103
  messages = [SystemMessage(content=TRIAGE_PROMPT_TEMPLATE)]
104
  messages.append(HumanMessage(content=message))
 
118
  return "This app can only answer question about Rémi Cazelles's projects, work and education."
119
  print("passed the safeguard")
120
 
121
+ WELCOME_TEXT = "This bot allows you finding informations related to Rémi Cazelles's projects, work and education"
122
+ if not history:
123
+ # Gradio expects a list of dicts with keys "role" and "content"
124
+ history = [
125
+ {"role": "assistant", "content": WELCOME_TEXT}
126
+ ]
127
+
128
  # Build conversation history
129
  history_langchain_format = []
130
  for msg in history:
 
135
 
136
 
137
  # Retrieve relevant documents for the current message
138
+ relevant_docs = vectorstore.similarity_search(message,k=5)
139
 
140
  # Build context from retrieved documents
141
  context = "\nExtracted documents:\n" + "\n".join([
142
+ f"Content document {i}: {doc.page_content}\n\n---"
143
  for i, doc in enumerate(relevant_docs)
144
  ])
145
 
146
 
147
 
148
  # RAG tool
149
+ RAG_PROMPT_TEMPLATE="""You will be asked information related to Rémi Cazelles's specific projects, work and education.
150
+ Using the information contained in the context, provide a comprehensive answer to the question.
 
151
  Respond to the question asked with enought details, response should be precise and relevant to the question.
 
152
  """
153
 
154
 
155
  # Create the prompt with system message, context, and conversation history
156
  messages = [SystemMessage(content=RAG_PROMPT_TEMPLATE)]
157
+ messages.append(AIMessage(content=WELCOME_TEXT))
158
  messages.extend(history_langchain_format)
159
  combined_message = f"Context: {context}\n\nQuestion: {message}"
160
  messages.append(HumanMessage(content=combined_message))
 
172
  }
173
  )
174
 
175
+ raw_source_lines = [
176
+ f"{i+1} : {doc.metadata["source"].split("/")[-1]} (page {doc.metadata['page_label']}/{doc.metadata['total_pages']})\n---"
177
+ for i, doc in enumerate(relevant_docs)]
178
+
179
+ seen = set()
180
+ unique_source_lines = []
181
+ for line in raw_source_lines:
182
+ if line not in seen:
183
+ seen.add(line)
184
+ unique_source_lines.append(line)
185
+ source_context = "\nSources:\n" + "\n".join(unique_source_lines)
186
+
187
  print(gpt_response.content )
188
  print(source_context)
189
 
190
+ return WELCOME_TEXT + "\n\n" + gpt_response.content + "\n\n" + source_context
191
 
192
 
193
  # setup tracking