Spaces:
Sleeping
Sleeping
| from typing import Literal | |
| from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
| from langchain_chroma import Chroma | |
| from langchain_community.tools import DuckDuckGoSearchResults | |
| from langchain.tools import tool | |
| # Persistent storage setup | |
| embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004") | |
| vector_db = Chroma(persist_directory="./chroma_db", embedding_function=embeddings) | |
| def local_research_tool(query: str, search_type: Literal["similarity", "mmr"] = "similarity"): | |
| """ | |
| Searches the internal corporate knowledge base. | |
| Use 'similarity' for exact facts and 'mmr' for broad, diverse research. | |
| """ | |
| retriever = vector_db.as_retriever( | |
| search_type=search_type, | |
| search_kwargs={"k": 5, "fetch_k": 20, "lambda_mult": 0.5} | |
| ) | |
| docs = retriever.invoke(query) | |
| # Formatted for model synthesis with citations | |
| formatted = [ | |
| f"SOURCE: {d.metadata['source']} (Pg. {d.metadata['page']})\nCONTENT: {d.page_content}" | |
| for d in docs | |
| ] | |
| return "\n---\n".join(formatted) | |
| # Gemini-optimized web search fallback | |
| web_search_tool = DuckDuckGoSearchResults( | |
| k=3, | |
| description="Search the internet for real-time data, technical APIs (like PyTorch/LangChain), or news." | |
| ) | |
| tools = [local_research_tool, web_search_tool] |