Spaces:
Running
Running
| from langchain_core.documents import Document | |
| from langchain_core.tools import tool | |
| from ask_candid.services.knowledge_base import ( | |
| SourceNames, | |
| generate_queries, | |
| run_search, | |
| reranker, | |
| process_hit | |
| ) | |
| def search_candid_knowledge_base( | |
| query: str, | |
| sources: list[SourceNames], | |
| news_days_ago: int = 60 | |
| ) -> tuple[str, list[Document]]: | |
| """Search Candid's subject matter expert knowledge base to find answers about the social and philanthropic sector. | |
| This knowledge includes help articles and video training sessions from Candid's subject matter experts, blog posts | |
| about the sector from Candid staff and trusted partner authors, research documents about the sector and news | |
| articles curated about activity happening in the sector around the world. | |
| Searches are performed through a combination of vector and keyword searching. Results are then re-ranked against | |
| the original query to get the best results. | |
| Search results often come back with specific organizations named, especially if referencing the news. In these cases | |
| the organizations should be identified in Candid's data and links to their profiles **MUST** be included in final | |
| chat response to the user. | |
| Parameters | |
| ---------- | |
| query : str | |
| Text describing a user's question or a description of investigative work which requires support from Candid's | |
| knowledge base | |
| sources : list[SourceNames] | |
| One or more sources of knowledge from different areas at Candid. | |
| * Candid Blog: Blog posts from Candid staff and trusted partners intended to help those in the sector or | |
| illuminate ongoing work | |
| * Candid Help: Candid FAQs to help user's get started with Candid's product platform and learning resources | |
| * Candid Learning: Training documents from Candid's subject matter experts | |
| * Candid News: News articles and press releases about real-time activity in the philanthropic sector | |
| * IssueLab Research Reports: Academic research reports about the social/philanthropic sector | |
| * YouTube Training: Transcripts from video-based training seminars from Candid's subject matter experts | |
| news_days_ago : int, optional | |
| How many days in the past to search for news articles, if a user is asking for recent trends then this value | |
| should be set lower >~ 10, by default 60 | |
| Returns | |
| ------- | |
| str | |
| Re-ranked document text | |
| """ | |
| vector_queries, quasi_vector_queries = generate_queries( | |
| query=query, | |
| sources=sources, | |
| news_days_ago=news_days_ago | |
| ) | |
| results = run_search(vector_searches=vector_queries, non_vector_searches=quasi_vector_queries) | |
| text_response = [] | |
| response_sources = [] | |
| for hit in map(process_hit, reranker(results, search_text=query)): | |
| text_response.append(hit.page_content) | |
| response_sources.append(hit) | |
| return '\n\n'.join(text_response), response_sources | |