import json from typing import List from langchain_core.tools import tool from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_chroma import Chroma from langchain_tavily import TavilySearch from langchain_community.document_loaders import WikipediaLoader, ArxivLoader from dotenv import load_dotenv from config import configs load_dotenv() def intialize_chroma_vectorstore(): """Initialize and return the Chroma vector store.""" dense_embeddings = HuggingFaceEmbeddings( model_name=configs["EMBEDDING_MODEL_NAME"] ) vectorstore = Chroma( persist_directory=configs["PERSIST_PATH"], embedding_function=dense_embeddings, collection_name=configs["COLLECTION_NAME"] ) return vectorstore @tool def web_search_tavily(query: str) -> dict: """Search Tavily for a query and return up to 3 results. Args: query: The search query. Returns: dict with key 'web_results', containing a list of search results with 'source', 'page', and 'content'. """ try: search_docs = TavilySearch( max_results=3, topic="general", ).invoke({"query": query})["results"] results = [ { "title": doc.get("title", ""), "url": doc.get("url", ""), "content": doc.get("content", ""), } for doc in search_docs ] return {"web_results": results} except Exception as e: return {"web_results": f"Error retrieving results: {str(e)}"} @tool def wikipedia_search(query: str) -> dict: """Search Wikipedia for a query and return up to 3 results. Args: query: The search query. Returns: dict with key 'wiki_results', containing a list of search results with 'title', 'url', and 'snippet'. """ try: search_docs = WikipediaLoader(query=query, load_max_docs=3).load() results = [ { "title": doc.metadata.get("title", ""), "url": doc.metadata.get("url", ""), "snippet": doc.page_content, } for doc in search_docs ] return {"wiki_results": results} except Exception as e: return {"wiki_results": f"Error retrieving results: {str(e)}"} @tool def arxiv_search(query: str) -> dict: """Search Arxiv for a query and return up to 3 results. Args: query: The search query. Returns: dict with key 'arxiv_results', containing a list of search results with 'title', 'url', and 'snippet'. """ try: search_docs = ArxivLoader(query=query, load_max_docs=3).load() results = [ { "title": doc.metadata.get("title", ""), "url": doc.metadata.get("url", ""), "snippet": doc.page_content, } for doc in search_docs ] return {"arxiv_results": results} except Exception as e: return {"arxiv_results": f"Error retrieving results: {str(e)}"} if __name__ == "__main__": pass