Spaces:

jedick
/

R-help-chat

Running

jedick commited on Jan 4

Commit

1ec08a1

1 Parent(s): 5a1e3f1

Fix up examples

Files changed (3) hide show

app.py CHANGED Viewed

@@ -354,7 +354,7 @@ with gr.Blocks(
             **Features:** RAG, today's date, hybrid search (semantic + lexical), multiple retrievals, citations output, chat memory<br>
             **Tech:** [OpenAI](https://openai.com/), [Chroma](https://www.trychroma.com/),
               [BM25S](https://github.com/xhluca/bm25s), [LangGraph](https://www.langchain.com/langgraph), [Gradio](https://www.langchain.com/langgraph)<br>
-            **Maintainer:** [Jeffrey Dick](mailto:j3ffdick@gmail.com) - feedback welcome!<br>
             🏠 **More info:** [R-help-chat GitHub repository](https://github.com/jedick/R-help-chat)
             """
         return info_text
@@ -364,7 +364,7 @@ with gr.Blocks(
         questions = [
             # "What is today's date?",
             "Show me code examples using plotmath",
-            "Summarize emails from the most recent two months",
         ]
         # cf. https://github.com/gradio-app/gradio/pull/8745 for updating examples
@@ -374,7 +374,7 @@ with gr.Blocks(
         """Get multi-tool example questions"""
         questions = [
             "Differences between lapply and for loops",
-            "Discuss pipe operator usage in 2022, 2023, and 2024",
         ]
         return gr.Dataset(samples=[[q] for q in questions]) if as_dataset else questions

             **Features:** RAG, today's date, hybrid search (semantic + lexical), multiple retrievals, citations output, chat memory<br>
             **Tech:** [OpenAI](https://openai.com/), [Chroma](https://www.trychroma.com/),
               [BM25S](https://github.com/xhluca/bm25s), [LangGraph](https://www.langchain.com/langgraph), [Gradio](https://www.langchain.com/langgraph)<br>
+            **Maintainer:** [Jeffrey Dick](https://jedick.github.io) - feedback welcome!<br>
             🏠 **More info:** [R-help-chat GitHub repository](https://github.com/jedick/R-help-chat)
             """
         return info_text
         questions = [
             # "What is today's date?",
             "Show me code examples using plotmath",
+            "Discuss pipe operator usage before 2025",
         ]
         # cf. https://github.com/gradio-app/gradio/pull/8745 for updating examples
         """Get multi-tool example questions"""
         questions = [
             "Differences between lapply and for loops",
+            "Summarize emails from the most recent two months",
         ]
         return gr.Dataset(samples=[[q] for q in questions]) if as_dataset else questions

prompts.py CHANGED Viewed

@@ -39,6 +39,7 @@ def query_prompt(db_dir, collection):
         # gpt-4o-mini thinks last two months aren't available with this: "Emails from from {start} to {end} are available for retrieval. "
         f"The emails available for retrieval are from {start} to {end}. "
         "For questions about differences, changes, or comparisons between X and Y, retrieve emails about X and Y using separate tool calls. "
         "Always use retrieve_emails with a non-empty query string for search_query. "
         "For general summaries, use retrieve_emails(search_query='R'). "
         "For questions about years, use retrieve_emails(search_query=<query>, start_year=, end_year=). "

         # gpt-4o-mini thinks last two months aren't available with this: "Emails from from {start} to {end} are available for retrieval. "
         f"The emails available for retrieval are from {start} to {end}. "
         "For questions about differences, changes, or comparisons between X and Y, retrieve emails about X and Y using separate tool calls. "
+        "Also use multiple tool calls for multiple months or years but not long year ranges (> 5 years). "
         "Always use retrieve_emails with a non-empty query string for search_query. "
         "For general summaries, use retrieve_emails(search_query='R'). "
         "For questions about years, use retrieve_emails(search_query=<query>, start_year=, end_year=). "

retriever.py CHANGED Viewed

@@ -48,9 +48,11 @@ def BuildRetriever(
                 db_dir=db_dir, collection=collection, top_k=top_k
             )
         else:
-            # Get 10000 documents then keep top_k filtered by year and month
             base_retriever = BuildRetrieverDense(
-                db_dir=db_dir, collection=collection, top_k=10000
             )
             return TopKRetriever(
                 base_retriever=base_retriever,
@@ -66,7 +68,7 @@ def BuildRetriever(
             )
         else:
             base_retriever = BuildRetrieverSparse(
-                db_dir=db_dir, collection=collection, top_k=10000
             )
             return TopKRetriever(
                 base_retriever=base_retriever,

                 db_dir=db_dir, collection=collection, top_k=top_k
             )
         else:
+            # Get 20000 documents then keep top_k filtered by year and month
+            # If this is increased to 100000 we get: chromadb.errors.InternalError: Error executing plan:
+            #   Internal error: error returned from database: (code: 1) too many SQL variables
             base_retriever = BuildRetrieverDense(
+                db_dir=db_dir, collection=collection, top_k=20000
             )
             return TopKRetriever(
                 base_retriever=base_retriever,
             )
         else:
             base_retriever = BuildRetrieverSparse(
+                db_dir=db_dir, collection=collection, top_k=20000
             )
             return TopKRetriever(
                 base_retriever=base_retriever,