Spaces:
Sleeping
Sleeping
File size: 1,358 Bytes
9806c71 5dce38e 9806c71 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
from typing import Literal
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_chroma import Chroma
from langchain_community.tools import DuckDuckGoSearchResults
from langchain.tools import tool
# Persistent storage setup
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
vector_db = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)
@tool
def local_research_tool(query: str, search_type: Literal["similarity", "mmr"] = "similarity"):
"""
Searches the internal corporate knowledge base.
Use 'similarity' for exact facts and 'mmr' for broad, diverse research.
"""
retriever = vector_db.as_retriever(
search_type=search_type,
search_kwargs={"k": 5, "fetch_k": 20, "lambda_mult": 0.5}
)
docs = retriever.invoke(query)
# Formatted for model synthesis with citations
formatted = [
f"SOURCE: {d.metadata['source']} (Pg. {d.metadata['page']})\nCONTENT: {d.page_content}"
for d in docs
]
return "\n---\n".join(formatted)
# Gemini-optimized web search fallback
web_search_tool = DuckDuckGoSearchResults(
k=3,
description="Search the internet for real-time data, technical APIs (like PyTorch/LangChain), or news."
)
tools = [local_research_tool, web_search_tool] |