jedick commited on
Commit
1ec08a1
·
1 Parent(s): 5a1e3f1

Fix up examples

Browse files
Files changed (3) hide show
  1. app.py +3 -3
  2. prompts.py +1 -0
  3. retriever.py +5 -3
app.py CHANGED
@@ -354,7 +354,7 @@ with gr.Blocks(
354
  **Features:** RAG, today's date, hybrid search (semantic + lexical), multiple retrievals, citations output, chat memory<br>
355
  **Tech:** [OpenAI](https://openai.com/), [Chroma](https://www.trychroma.com/),
356
  [BM25S](https://github.com/xhluca/bm25s), [LangGraph](https://www.langchain.com/langgraph), [Gradio](https://www.langchain.com/langgraph)<br>
357
- **Maintainer:** [Jeffrey Dick](mailto:j3ffdick@gmail.com) - feedback welcome!<br>
358
  🏠 **More info:** [R-help-chat GitHub repository](https://github.com/jedick/R-help-chat)
359
  """
360
  return info_text
@@ -364,7 +364,7 @@ with gr.Blocks(
364
  questions = [
365
  # "What is today's date?",
366
  "Show me code examples using plotmath",
367
- "Summarize emails from the most recent two months",
368
  ]
369
 
370
  # cf. https://github.com/gradio-app/gradio/pull/8745 for updating examples
@@ -374,7 +374,7 @@ with gr.Blocks(
374
  """Get multi-tool example questions"""
375
  questions = [
376
  "Differences between lapply and for loops",
377
- "Discuss pipe operator usage in 2022, 2023, and 2024",
378
  ]
379
 
380
  return gr.Dataset(samples=[[q] for q in questions]) if as_dataset else questions
 
354
  **Features:** RAG, today's date, hybrid search (semantic + lexical), multiple retrievals, citations output, chat memory<br>
355
  **Tech:** [OpenAI](https://openai.com/), [Chroma](https://www.trychroma.com/),
356
  [BM25S](https://github.com/xhluca/bm25s), [LangGraph](https://www.langchain.com/langgraph), [Gradio](https://www.langchain.com/langgraph)<br>
357
+ **Maintainer:** [Jeffrey Dick](https://jedick.github.io) - feedback welcome!<br>
358
  🏠 **More info:** [R-help-chat GitHub repository](https://github.com/jedick/R-help-chat)
359
  """
360
  return info_text
 
364
  questions = [
365
  # "What is today's date?",
366
  "Show me code examples using plotmath",
367
+ "Discuss pipe operator usage before 2025",
368
  ]
369
 
370
  # cf. https://github.com/gradio-app/gradio/pull/8745 for updating examples
 
374
  """Get multi-tool example questions"""
375
  questions = [
376
  "Differences between lapply and for loops",
377
+ "Summarize emails from the most recent two months",
378
  ]
379
 
380
  return gr.Dataset(samples=[[q] for q in questions]) if as_dataset else questions
prompts.py CHANGED
@@ -39,6 +39,7 @@ def query_prompt(db_dir, collection):
39
  # gpt-4o-mini thinks last two months aren't available with this: "Emails from from {start} to {end} are available for retrieval. "
40
  f"The emails available for retrieval are from {start} to {end}. "
41
  "For questions about differences, changes, or comparisons between X and Y, retrieve emails about X and Y using separate tool calls. "
 
42
  "Always use retrieve_emails with a non-empty query string for search_query. "
43
  "For general summaries, use retrieve_emails(search_query='R'). "
44
  "For questions about years, use retrieve_emails(search_query=<query>, start_year=, end_year=). "
 
39
  # gpt-4o-mini thinks last two months aren't available with this: "Emails from from {start} to {end} are available for retrieval. "
40
  f"The emails available for retrieval are from {start} to {end}. "
41
  "For questions about differences, changes, or comparisons between X and Y, retrieve emails about X and Y using separate tool calls. "
42
+ "Also use multiple tool calls for multiple months or years but not long year ranges (> 5 years). "
43
  "Always use retrieve_emails with a non-empty query string for search_query. "
44
  "For general summaries, use retrieve_emails(search_query='R'). "
45
  "For questions about years, use retrieve_emails(search_query=<query>, start_year=, end_year=). "
retriever.py CHANGED
@@ -48,9 +48,11 @@ def BuildRetriever(
48
  db_dir=db_dir, collection=collection, top_k=top_k
49
  )
50
  else:
51
- # Get 10000 documents then keep top_k filtered by year and month
 
 
52
  base_retriever = BuildRetrieverDense(
53
- db_dir=db_dir, collection=collection, top_k=10000
54
  )
55
  return TopKRetriever(
56
  base_retriever=base_retriever,
@@ -66,7 +68,7 @@ def BuildRetriever(
66
  )
67
  else:
68
  base_retriever = BuildRetrieverSparse(
69
- db_dir=db_dir, collection=collection, top_k=10000
70
  )
71
  return TopKRetriever(
72
  base_retriever=base_retriever,
 
48
  db_dir=db_dir, collection=collection, top_k=top_k
49
  )
50
  else:
51
+ # Get 20000 documents then keep top_k filtered by year and month
52
+ # If this is increased to 100000 we get: chromadb.errors.InternalError: Error executing plan:
53
+ # Internal error: error returned from database: (code: 1) too many SQL variables
54
  base_retriever = BuildRetrieverDense(
55
+ db_dir=db_dir, collection=collection, top_k=20000
56
  )
57
  return TopKRetriever(
58
  base_retriever=base_retriever,
 
68
  )
69
  else:
70
  base_retriever = BuildRetrieverSparse(
71
+ db_dir=db_dir, collection=collection, top_k=20000
72
  )
73
  return TopKRetriever(
74
  base_retriever=base_retriever,