Spaces:
Running
Running
jedick commited on
Commit ·
1ec08a1
1
Parent(s): 5a1e3f1
Fix up examples
Browse files- app.py +3 -3
- prompts.py +1 -0
- retriever.py +5 -3
app.py
CHANGED
|
@@ -354,7 +354,7 @@ with gr.Blocks(
|
|
| 354 |
**Features:** RAG, today's date, hybrid search (semantic + lexical), multiple retrievals, citations output, chat memory<br>
|
| 355 |
**Tech:** [OpenAI](https://openai.com/), [Chroma](https://www.trychroma.com/),
|
| 356 |
[BM25S](https://github.com/xhluca/bm25s), [LangGraph](https://www.langchain.com/langgraph), [Gradio](https://www.langchain.com/langgraph)<br>
|
| 357 |
-
**Maintainer:** [Jeffrey Dick](
|
| 358 |
🏠 **More info:** [R-help-chat GitHub repository](https://github.com/jedick/R-help-chat)
|
| 359 |
"""
|
| 360 |
return info_text
|
|
@@ -364,7 +364,7 @@ with gr.Blocks(
|
|
| 364 |
questions = [
|
| 365 |
# "What is today's date?",
|
| 366 |
"Show me code examples using plotmath",
|
| 367 |
-
"
|
| 368 |
]
|
| 369 |
|
| 370 |
# cf. https://github.com/gradio-app/gradio/pull/8745 for updating examples
|
|
@@ -374,7 +374,7 @@ with gr.Blocks(
|
|
| 374 |
"""Get multi-tool example questions"""
|
| 375 |
questions = [
|
| 376 |
"Differences between lapply and for loops",
|
| 377 |
-
"
|
| 378 |
]
|
| 379 |
|
| 380 |
return gr.Dataset(samples=[[q] for q in questions]) if as_dataset else questions
|
|
|
|
| 354 |
**Features:** RAG, today's date, hybrid search (semantic + lexical), multiple retrievals, citations output, chat memory<br>
|
| 355 |
**Tech:** [OpenAI](https://openai.com/), [Chroma](https://www.trychroma.com/),
|
| 356 |
[BM25S](https://github.com/xhluca/bm25s), [LangGraph](https://www.langchain.com/langgraph), [Gradio](https://www.langchain.com/langgraph)<br>
|
| 357 |
+
**Maintainer:** [Jeffrey Dick](https://jedick.github.io) - feedback welcome!<br>
|
| 358 |
🏠 **More info:** [R-help-chat GitHub repository](https://github.com/jedick/R-help-chat)
|
| 359 |
"""
|
| 360 |
return info_text
|
|
|
|
| 364 |
questions = [
|
| 365 |
# "What is today's date?",
|
| 366 |
"Show me code examples using plotmath",
|
| 367 |
+
"Discuss pipe operator usage before 2025",
|
| 368 |
]
|
| 369 |
|
| 370 |
# cf. https://github.com/gradio-app/gradio/pull/8745 for updating examples
|
|
|
|
| 374 |
"""Get multi-tool example questions"""
|
| 375 |
questions = [
|
| 376 |
"Differences between lapply and for loops",
|
| 377 |
+
"Summarize emails from the most recent two months",
|
| 378 |
]
|
| 379 |
|
| 380 |
return gr.Dataset(samples=[[q] for q in questions]) if as_dataset else questions
|
prompts.py
CHANGED
|
@@ -39,6 +39,7 @@ def query_prompt(db_dir, collection):
|
|
| 39 |
# gpt-4o-mini thinks last two months aren't available with this: "Emails from from {start} to {end} are available for retrieval. "
|
| 40 |
f"The emails available for retrieval are from {start} to {end}. "
|
| 41 |
"For questions about differences, changes, or comparisons between X and Y, retrieve emails about X and Y using separate tool calls. "
|
|
|
|
| 42 |
"Always use retrieve_emails with a non-empty query string for search_query. "
|
| 43 |
"For general summaries, use retrieve_emails(search_query='R'). "
|
| 44 |
"For questions about years, use retrieve_emails(search_query=<query>, start_year=, end_year=). "
|
|
|
|
| 39 |
# gpt-4o-mini thinks last two months aren't available with this: "Emails from from {start} to {end} are available for retrieval. "
|
| 40 |
f"The emails available for retrieval are from {start} to {end}. "
|
| 41 |
"For questions about differences, changes, or comparisons between X and Y, retrieve emails about X and Y using separate tool calls. "
|
| 42 |
+
"Also use multiple tool calls for multiple months or years but not long year ranges (> 5 years). "
|
| 43 |
"Always use retrieve_emails with a non-empty query string for search_query. "
|
| 44 |
"For general summaries, use retrieve_emails(search_query='R'). "
|
| 45 |
"For questions about years, use retrieve_emails(search_query=<query>, start_year=, end_year=). "
|
retriever.py
CHANGED
|
@@ -48,9 +48,11 @@ def BuildRetriever(
|
|
| 48 |
db_dir=db_dir, collection=collection, top_k=top_k
|
| 49 |
)
|
| 50 |
else:
|
| 51 |
-
# Get
|
|
|
|
|
|
|
| 52 |
base_retriever = BuildRetrieverDense(
|
| 53 |
-
db_dir=db_dir, collection=collection, top_k=
|
| 54 |
)
|
| 55 |
return TopKRetriever(
|
| 56 |
base_retriever=base_retriever,
|
|
@@ -66,7 +68,7 @@ def BuildRetriever(
|
|
| 66 |
)
|
| 67 |
else:
|
| 68 |
base_retriever = BuildRetrieverSparse(
|
| 69 |
-
db_dir=db_dir, collection=collection, top_k=
|
| 70 |
)
|
| 71 |
return TopKRetriever(
|
| 72 |
base_retriever=base_retriever,
|
|
|
|
| 48 |
db_dir=db_dir, collection=collection, top_k=top_k
|
| 49 |
)
|
| 50 |
else:
|
| 51 |
+
# Get 20000 documents then keep top_k filtered by year and month
|
| 52 |
+
# If this is increased to 100000 we get: chromadb.errors.InternalError: Error executing plan:
|
| 53 |
+
# Internal error: error returned from database: (code: 1) too many SQL variables
|
| 54 |
base_retriever = BuildRetrieverDense(
|
| 55 |
+
db_dir=db_dir, collection=collection, top_k=20000
|
| 56 |
)
|
| 57 |
return TopKRetriever(
|
| 58 |
base_retriever=base_retriever,
|
|
|
|
| 68 |
)
|
| 69 |
else:
|
| 70 |
base_retriever = BuildRetrieverSparse(
|
| 71 |
+
db_dir=db_dir, collection=collection, top_k=20000
|
| 72 |
)
|
| 73 |
return TopKRetriever(
|
| 74 |
base_retriever=base_retriever,
|