Spaces:
Build error
Build error
Some refs are displayed twice, the bot is lacking general context regarding its overall aim
#9
by
RCaz
- opened
app.py
CHANGED
|
@@ -98,6 +98,7 @@ def predict(message, history, request: gr.Request):
|
|
| 98 |
|
| 99 |
# Safeguard
|
| 100 |
TRIAGE_PROMPT_TEMPLATE="""You are a Safeguard assistant making sure the user only ask for information related to Rémi Cazelles's projects, work and education.
|
|
|
|
| 101 |
If the question is not related to this subjects, or if the request is harmfull you should flag the user by answering '*** FLAGGED ***' """
|
| 102 |
messages = [SystemMessage(content=TRIAGE_PROMPT_TEMPLATE)]
|
| 103 |
messages.append(HumanMessage(content=message))
|
|
@@ -117,6 +118,13 @@ def predict(message, history, request: gr.Request):
|
|
| 117 |
return "This app can only answer question about Rémi Cazelles's projects, work and education."
|
| 118 |
print("passed the safeguard")
|
| 119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
# Build conversation history
|
| 121 |
history_langchain_format = []
|
| 122 |
for msg in history:
|
|
@@ -127,28 +135,26 @@ def predict(message, history, request: gr.Request):
|
|
| 127 |
|
| 128 |
|
| 129 |
# Retrieve relevant documents for the current message
|
| 130 |
-
relevant_docs = vectorstore.similarity_search(message,k=
|
| 131 |
|
| 132 |
# Build context from retrieved documents
|
| 133 |
context = "\nExtracted documents:\n" + "\n".join([
|
| 134 |
-
f"
|
| 135 |
for i, doc in enumerate(relevant_docs)
|
| 136 |
])
|
| 137 |
|
| 138 |
|
| 139 |
|
| 140 |
# RAG tool
|
| 141 |
-
RAG_PROMPT_TEMPLATE="""You will be asked information
|
| 142 |
-
Using the information contained in the context,
|
| 143 |
-
give a comprehensive answer to the question.
|
| 144 |
Respond to the question asked with enought details, response should be precise and relevant to the question.
|
| 145 |
-
If the answer cannot be deduced from the context, simply says you can't find information.
|
| 146 |
"""
|
| 147 |
|
| 148 |
|
| 149 |
# Create the prompt with system message, context, and conversation history
|
| 150 |
messages = [SystemMessage(content=RAG_PROMPT_TEMPLATE)]
|
| 151 |
-
messages.
|
| 152 |
messages.extend(history_langchain_format)
|
| 153 |
combined_message = f"Context: {context}\n\nQuestion: {message}"
|
| 154 |
messages.append(HumanMessage(content=combined_message))
|
|
@@ -166,15 +172,22 @@ def predict(message, history, request: gr.Request):
|
|
| 166 |
}
|
| 167 |
)
|
| 168 |
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
print(gpt_response.content )
|
| 175 |
print(source_context)
|
| 176 |
|
| 177 |
-
return gpt_response.content + "\n\n" + source_context
|
| 178 |
|
| 179 |
|
| 180 |
# setup tracking
|
|
|
|
| 98 |
|
| 99 |
# Safeguard
|
| 100 |
TRIAGE_PROMPT_TEMPLATE="""You are a Safeguard assistant making sure the user only ask for information related to Rémi Cazelles's projects, work and education.
|
| 101 |
+
Here are general information you can use to answer:
|
| 102 |
If the question is not related to this subjects, or if the request is harmfull you should flag the user by answering '*** FLAGGED ***' """
|
| 103 |
messages = [SystemMessage(content=TRIAGE_PROMPT_TEMPLATE)]
|
| 104 |
messages.append(HumanMessage(content=message))
|
|
|
|
| 118 |
return "This app can only answer question about Rémi Cazelles's projects, work and education."
|
| 119 |
print("passed the safeguard")
|
| 120 |
|
| 121 |
+
WELCOME_TEXT = "This bot allows you finding informations related to Rémi Cazelles's projects, work and education"
|
| 122 |
+
if not history:
|
| 123 |
+
# Gradio expects a list of dicts with keys "role" and "content"
|
| 124 |
+
history = [
|
| 125 |
+
{"role": "assistant", "content": WELCOME_TEXT}
|
| 126 |
+
]
|
| 127 |
+
|
| 128 |
# Build conversation history
|
| 129 |
history_langchain_format = []
|
| 130 |
for msg in history:
|
|
|
|
| 135 |
|
| 136 |
|
| 137 |
# Retrieve relevant documents for the current message
|
| 138 |
+
relevant_docs = vectorstore.similarity_search(message,k=5)
|
| 139 |
|
| 140 |
# Build context from retrieved documents
|
| 141 |
context = "\nExtracted documents:\n" + "\n".join([
|
| 142 |
+
f"Content document {i}: {doc.page_content}\n\n---"
|
| 143 |
for i, doc in enumerate(relevant_docs)
|
| 144 |
])
|
| 145 |
|
| 146 |
|
| 147 |
|
| 148 |
# RAG tool
|
| 149 |
+
RAG_PROMPT_TEMPLATE="""You will be asked information related to Rémi Cazelles's specific projects, work and education.
|
| 150 |
+
Using the information contained in the context, provide a comprehensive answer to the question.
|
|
|
|
| 151 |
Respond to the question asked with enought details, response should be precise and relevant to the question.
|
|
|
|
| 152 |
"""
|
| 153 |
|
| 154 |
|
| 155 |
# Create the prompt with system message, context, and conversation history
|
| 156 |
messages = [SystemMessage(content=RAG_PROMPT_TEMPLATE)]
|
| 157 |
+
messages.append(AIMessage(content=WELCOME_TEXT))
|
| 158 |
messages.extend(history_langchain_format)
|
| 159 |
combined_message = f"Context: {context}\n\nQuestion: {message}"
|
| 160 |
messages.append(HumanMessage(content=combined_message))
|
|
|
|
| 172 |
}
|
| 173 |
)
|
| 174 |
|
| 175 |
+
raw_source_lines = [
|
| 176 |
+
f"{i+1} : {doc.metadata["source"].split("/")[-1]} (page {doc.metadata['page_label']}/{doc.metadata['total_pages']})\n---"
|
| 177 |
+
for i, doc in enumerate(relevant_docs)]
|
| 178 |
+
|
| 179 |
+
seen = set()
|
| 180 |
+
unique_source_lines = []
|
| 181 |
+
for line in raw_source_lines:
|
| 182 |
+
if line not in seen:
|
| 183 |
+
seen.add(line)
|
| 184 |
+
unique_source_lines.append(line)
|
| 185 |
+
source_context = "\nSources:\n" + "\n".join(unique_source_lines)
|
| 186 |
+
|
| 187 |
print(gpt_response.content )
|
| 188 |
print(source_context)
|
| 189 |
|
| 190 |
+
return WELCOME_TEXT + "\n\n" + gpt_response.content + "\n\n" + source_context
|
| 191 |
|
| 192 |
|
| 193 |
# setup tracking
|