from typing import Literal from langchain_google_genai import GoogleGenerativeAIEmbeddings from langchain_chroma import Chroma from langchain_community.tools import DuckDuckGoSearchResults from langchain.tools import tool # Persistent storage setup embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004") vector_db = Chroma(persist_directory="./chroma_db", embedding_function=embeddings) @tool def local_research_tool(query: str, search_type: Literal["similarity", "mmr"] = "similarity"): """ Searches the internal corporate knowledge base. Use 'similarity' for exact facts and 'mmr' for broad, diverse research. """ retriever = vector_db.as_retriever( search_type=search_type, search_kwargs={"k": 5, "fetch_k": 20, "lambda_mult": 0.5} ) docs = retriever.invoke(query) # Formatted for model synthesis with citations formatted = [ f"SOURCE: {d.metadata['source']} (Pg. {d.metadata['page']})\nCONTENT: {d.page_content}" for d in docs ] return "\n---\n".join(formatted) # Gemini-optimized web search fallback web_search_tool = DuckDuckGoSearchResults( k=3, description="Search the internet for real-time data, technical APIs (like PyTorch/LangChain), or news." ) tools = [local_research_tool, web_search_tool]