junaid17 commited on
Commit
7afac3f
·
verified ·
1 Parent(s): 1ee432f

Update tools.py

Browse files
Files changed (1) hide show
  1. tools.py +32 -47
tools.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from langchain_core.tools import tool
2
  from langchain_text_splitters import RecursiveCharacterTextSplitter
3
  from langchain_community.vectorstores import FAISS
@@ -17,73 +18,56 @@ load_dotenv()
17
  VECTORSTORE_DIR = "data/vectorstore"
18
  os.makedirs(VECTORSTORE_DIR, exist_ok=True)
19
 
20
- retriever = None
21
 
22
-
23
- def load_retriever():
24
- """Load FAISS retriever from disk if available."""
25
- global retriever
26
-
27
- try:
28
- embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
29
- index_path = os.path.join(VECTORSTORE_DIR, "index.faiss")
30
-
31
- if os.path.exists(index_path):
32
- vectorstore = FAISS.load_local(
33
- VECTORSTORE_DIR,
34
- embeddings,
35
- allow_dangerous_deserialization=True,
36
- )
37
- retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
38
- print("✅ Retriever loaded successfully")
39
- else:
40
- print("⚠️ No vectorstore found yet")
41
-
42
- except Exception as e:
43
- print("❌ Retriever load error:", e)
44
-
45
-
46
- # Load on startup
47
- load_retriever()
48
-
49
-
50
- def build_vectorstore(path: str):
51
- loader = PyPDFLoader(path)
52
- docs = loader.load()
53
 
54
  splitter = RecursiveCharacterTextSplitter(
55
  chunk_size=500,
56
  chunk_overlap=100
57
  )
58
 
59
- chunks = splitter.split_documents(docs)
60
- embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
61
 
 
62
  vectorstore = FAISS.from_documents(chunks, embeddings)
63
- vectorstore.save_local(VECTORSTORE_DIR)
64
 
 
65
  return vectorstore
66
 
67
 
68
- def update_retriever(path: str):
69
- global retriever
70
- retriever = build_vectorstore(path).as_retriever(search_kwargs={"k": 4})
71
 
72
 
73
  # ==============================
74
- # RAG TOOL
75
  # ==============================
76
  def create_rag_tool():
77
 
78
  @tool
79
  def rag_search(query: str) -> str:
80
- """Retrieve relevant context from uploaded documents."""
 
 
81
 
82
- global retriever
 
83
 
84
- if retriever is None:
85
- return "No document uploaded yet."
 
 
 
 
 
86
 
 
87
  docs = retriever.invoke(query)
88
 
89
  if not docs:
@@ -94,9 +78,9 @@ def create_rag_tool():
94
  return rag_search
95
 
96
 
97
- # -----------------------------
98
- # External tools (safe)
99
- # -----------------------------
100
 
101
  @tool
102
  def wikipedia_search(query: str) -> dict:
@@ -122,4 +106,5 @@ def tavily_search(query: str) -> dict:
122
  try:
123
  return {"results": TavilySearchResults(max_results=5).run(query)}
124
  except Exception as e:
125
- return {"error": str(e)}
 
 
1
+ ```python
2
  from langchain_core.tools import tool
3
  from langchain_text_splitters import RecursiveCharacterTextSplitter
4
  from langchain_community.vectorstores import FAISS
 
18
  VECTORSTORE_DIR = "data/vectorstore"
19
  os.makedirs(VECTORSTORE_DIR, exist_ok=True)
20
 
 
21
 
22
+ # ==============================
23
+ # VECTOR STORE CREATION
24
+ # ==============================
25
+ def build_vectorstore(file_path: str):
26
+ loader = PyPDFLoader(file_path)
27
+ documents = loader.load()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  splitter = RecursiveCharacterTextSplitter(
30
  chunk_size=500,
31
  chunk_overlap=100
32
  )
33
 
34
+ chunks = splitter.split_documents(documents)
 
35
 
36
+ embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
37
  vectorstore = FAISS.from_documents(chunks, embeddings)
 
38
 
39
+ vectorstore.save_local(VECTORSTORE_DIR)
40
  return vectorstore
41
 
42
 
43
+ def update_retriever(file_path: str):
44
+ """Rebuild vectorstore when a new document is uploaded."""
45
+ build_vectorstore(file_path)
46
 
47
 
48
  # ==============================
49
+ # RAG TOOL (HF SAFE)
50
  # ==============================
51
  def create_rag_tool():
52
 
53
  @tool
54
  def rag_search(query: str) -> str:
55
+ """
56
+ Retrieve relevant information from uploaded documents.
57
+ """
58
 
59
+ if not os.path.exists(os.path.join(VECTORSTORE_DIR, "index.faiss")):
60
+ return "No document has been uploaded yet."
61
 
62
+ embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
63
+
64
+ vectorstore = FAISS.load_local(
65
+ VECTORSTORE_DIR,
66
+ embeddings,
67
+ allow_dangerous_deserialization=True
68
+ )
69
 
70
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
71
  docs = retriever.invoke(query)
72
 
73
  if not docs:
 
78
  return rag_search
79
 
80
 
81
+ # ==============================
82
+ # EXTRA TOOLS
83
+ # ==============================
84
 
85
  @tool
86
  def wikipedia_search(query: str) -> dict:
 
106
  try:
107
  return {"results": TavilySearchResults(max_results=5).run(query)}
108
  except Exception as e:
109
+ return {"error": str(e)}
110
+ ```