File size: 3,155 Bytes
30ee88a
 
 
 
 
c92c6b2
30ee88a
c92c6b2
30ee88a
 
 
c92c6b2
30ee88a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0fc97a4
 
30ee88a
0fc97a4
 
 
 
30ee88a
 
c92c6b2
 
 
 
0fc97a4
30ee88a
0fc97a4
 
 
30ee88a
0fc97a4
30ee88a
0fc97a4
30ee88a
0fc97a4
30ee88a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import json
from typing import List
from langchain_core.tools import tool
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_tavily import TavilySearch
from langchain_community.document_loaders import WikipediaLoader, ArxivLoader 
from dotenv import load_dotenv

from config import configs

load_dotenv()

def intialize_chroma_vectorstore():
    """Initialize and return the Chroma vector store."""
    dense_embeddings = HuggingFaceEmbeddings(
        model_name=configs["EMBEDDING_MODEL_NAME"]
    )

    vectorstore = Chroma(
        persist_directory=configs["PERSIST_PATH"],
        embedding_function=dense_embeddings,
        collection_name=configs["COLLECTION_NAME"]
    )
    return vectorstore

@tool
def web_search_tavily(query: str) -> dict:
    """Search Tavily for a query and return up to 3 results.
    Args:
        query: The search query.
    Returns:
        dict with key 'web_results', containing a list of search results with
        'source', 'page', and 'content'.
    """
    try:
        search_docs = TavilySearch(
            max_results=3,
            topic="general",
            ).invoke({"query": query})["results"]
        results = [
            {
                "title": doc.get("title", ""),
                "url": doc.get("url", ""),
                "content": doc.get("content", ""),
            }
            for doc in search_docs
        ]
        return {"web_results": results}
    except Exception as e:
        return {"web_results": f"Error retrieving results: {str(e)}"}
    
@tool 
def wikipedia_search(query: str) -> dict:
    """Search Wikipedia for a query and return up to 3 results.
    Args:
        query: The search query.
    Returns:
        dict with key 'wiki_results', containing a list of search results with
        'title', 'url', and 'snippet'.
    """
    try:
        search_docs = WikipediaLoader(query=query, load_max_docs=3).load()
        results = [
            {
                "title": doc.metadata.get("title", ""),
                "url": doc.metadata.get("url", ""),
                "snippet": doc.page_content,
            }
            for doc in search_docs
        ]
        return {"wiki_results": results}
    except Exception as e:
        return {"wiki_results": f"Error retrieving results: {str(e)}"}

@tool
def arxiv_search(query: str) -> dict:
    """Search Arxiv for a query and return up to 3 results.
    Args:
        query: The search query.
    Returns:
        dict with key 'arxiv_results', containing a list of search results with
        'title', 'url', and 'snippet'.
    """
    try:
        search_docs = ArxivLoader(query=query, load_max_docs=3).load()
        results = [
            {
                "title": doc.metadata.get("title", ""),
                "url": doc.metadata.get("url", ""),
                "snippet": doc.page_content,
            }
            for doc in search_docs
        ]
        return {"arxiv_results": results}
    except Exception as e:
        return {"arxiv_results": f"Error retrieving results: {str(e)}"}
    
    
if __name__ == "__main__":
    pass