File size: 1,358 Bytes
9806c71
 
 
5dce38e
9806c71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from typing import Literal
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_chroma import Chroma
from langchain_community.tools import DuckDuckGoSearchResults
from langchain.tools import tool

# Persistent storage setup
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
vector_db = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)

@tool
def local_research_tool(query: str, search_type: Literal["similarity", "mmr"] = "similarity"):
    """

    Searches the internal corporate knowledge base. 

    Use 'similarity' for exact facts and 'mmr' for broad, diverse research.

    """
    retriever = vector_db.as_retriever(
        search_type=search_type, 
        search_kwargs={"k": 5, "fetch_k": 20, "lambda_mult": 0.5}
    )
    docs = retriever.invoke(query)
    
    # Formatted for model synthesis with citations
    formatted = [
        f"SOURCE: {d.metadata['source']} (Pg. {d.metadata['page']})\nCONTENT: {d.page_content}" 
        for d in docs
    ]
    return "\n---\n".join(formatted)

# Gemini-optimized web search fallback
web_search_tool = DuckDuckGoSearchResults(
    k=3,
    description="Search the internet for real-time data, technical APIs (like PyTorch/LangChain), or news."
)

tools = [local_research_tool, web_search_tool]