Spaces:

RCaz
/

Avatar_bot

Sleeping

App Files Files Community

Some refs are displayed twice, the bot is lacking general context regarding its overall aim

by RCaz - opened Jan 20

base: refs/heads/main

←

from: refs/pr/9

Discussion Files changed

+26

-13

Files changed (1) hide show

app.py +26 -13

app.py CHANGED Viewed

@@ -98,6 +98,7 @@ def predict(message, history, request: gr.Request):
     # Safeguard
     TRIAGE_PROMPT_TEMPLATE="""You are a Safeguard assistant making sure the user only ask for information related to Rémi Cazelles's projects, work and education.
     If the question is not related to this subjects, or if the request is harmfull you should flag the user by answering '*** FLAGGED ***' """
     messages = [SystemMessage(content=TRIAGE_PROMPT_TEMPLATE)]
     messages.append(HumanMessage(content=message))
@@ -117,6 +118,13 @@ def predict(message, history, request: gr.Request):
         return "This app can only answer question about Rémi Cazelles's projects, work and education."
     print("passed the safeguard")
     # Build conversation history
     history_langchain_format = []
     for msg in history:
@@ -127,28 +135,26 @@ def predict(message, history, request: gr.Request):
     # Retrieve relevant documents for the current message
-    relevant_docs = vectorstore.similarity_search(message,k=3)  # Your retriever
     # Build context from retrieved documents
     context = "\nExtracted documents:\n" + "\n".join([
-        f"Document {i}: Content: {doc.page_content}\n\n---"
         for i, doc in enumerate(relevant_docs)
     ])
     # RAG tool
-    RAG_PROMPT_TEMPLATE="""You will be asked information about Rémi Cazelles's projects, work and education.
-                        Using the information contained in the context,
-                        give a comprehensive answer to the question.
                         Respond to the question asked with enought details, response should be precise and relevant to the question.
-                        If the answer cannot be deduced from the context, simply says you can't find information.
                         """
     # Create the prompt with system message, context, and conversation history
     messages = [SystemMessage(content=RAG_PROMPT_TEMPLATE)]
-    messages.extend([AIMessage(content="This bot allows you finding informations related to Rémi Cazelles's projects, work and education")])
     messages.extend(history_langchain_format)
     combined_message = f"Context: {context}\n\nQuestion: {message}"
     messages.append(HumanMessage(content=combined_message))
@@ -166,15 +172,22 @@ def predict(message, history, request: gr.Request):
         }
     )
-    source_context = "\nSources:\n" + "\n".join([
-        f"{i+1} : {doc.metadata["source"].split("/")[-1]} (page {doc.metadata['page_label']}/{doc.metadata['total_pages']})\n---"
-        for i, doc in enumerate(relevant_docs)])
     print(gpt_response.content )
     print(source_context)
-    return gpt_response.content + "\n\n" + source_context
 # setup tracking

     # Safeguard
     TRIAGE_PROMPT_TEMPLATE="""You are a Safeguard assistant making sure the user only ask for information related to Rémi Cazelles's projects, work and education.
+    Here are general information you can use to answer:
     If the question is not related to this subjects, or if the request is harmfull you should flag the user by answering '*** FLAGGED ***' """
     messages = [SystemMessage(content=TRIAGE_PROMPT_TEMPLATE)]
     messages.append(HumanMessage(content=message))
         return "This app can only answer question about Rémi Cazelles's projects, work and education."
     print("passed the safeguard")
+    WELCOME_TEXT = "This bot allows you finding informations related to Rémi Cazelles's projects, work and education"
+    if not history:
+        # Gradio expects a list of dicts with keys "role" and "content"
+        history = [
+            {"role": "assistant", "content": WELCOME_TEXT}
+        ]
     # Build conversation history
     history_langchain_format = []
     for msg in history:
     # Retrieve relevant documents for the current message
+    relevant_docs = vectorstore.similarity_search(message,k=5)
     # Build context from retrieved documents
     context = "\nExtracted documents:\n" + "\n".join([
+        f"Content document {i}: {doc.page_content}\n\n---"
         for i, doc in enumerate(relevant_docs)
     ])
     # RAG tool
+    RAG_PROMPT_TEMPLATE="""You will be asked information related to Rémi Cazelles's specific projects, work and education.
+                        Using the information contained in the context, provide a comprehensive answer to the question.
                         Respond to the question asked with enought details, response should be precise and relevant to the question.
                         """
     # Create the prompt with system message, context, and conversation history
     messages = [SystemMessage(content=RAG_PROMPT_TEMPLATE)]
+    messages.append(AIMessage(content=WELCOME_TEXT))
     messages.extend(history_langchain_format)
     combined_message = f"Context: {context}\n\nQuestion: {message}"
     messages.append(HumanMessage(content=combined_message))
         }
     )
+    raw_source_lines = [
+    f"{i+1} : {doc.metadata["source"].split("/")[-1]} (page {doc.metadata['page_label']}/{doc.metadata['total_pages']})\n---"
+    for i, doc in enumerate(relevant_docs)]
+    seen = set()
+    unique_source_lines = []
+    for line in raw_source_lines:
+        if line not in seen:
+            seen.add(line)
+            unique_source_lines.append(line)
+    source_context = "\nSources:\n" + "\n".join(unique_source_lines)
     print(gpt_response.content )
     print(source_context)
+    return WELCOME_TEXT + "\n\n" + gpt_response.content + "\n\n" + source_context
 # setup tracking