Krishna172912 commited on
Commit
8ddfaad
Β·
unverified Β·
1 Parent(s): 170ed9f

Add files via upload

Browse files
Files changed (2) hide show
  1. back_end/agent/graph.py +193 -0
  2. back_end/agent/tools.py +334 -0
back_end/agent/graph.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tiktoken
2
+ from langchain_core.messages import trim_messages,HumanMessage, AIMessage
3
+ from langchain_google_genai import ChatGoogleGenerativeAI
4
+ from langgraph.graph import MessagesState,StateGraph, START, END, MessagesState
5
+ from langgraph.prebuilt import ToolNode, tools_condition
6
+ from pydantic import BaseModel, Field
7
+ from typing import Literal
8
+ import json
9
+ from pathlib import Path
10
+ from langchain_core.documents import Document
11
+ from agent.tools import get_code_search_tools
12
+
13
+ from config import (
14
+ SUPERVISOR_SYSTEM_PROMPT,
15
+ AGENT_SYSTEM_PROMPT_HEADER,
16
+ AGENT_SYSTEM_PROMPT_TOOLS,
17
+ AGENT_SYSTEM_PROMPT_TOOLS_NO_DB,
18
+ AGENT_SYSTEM_PROMPT_FOOTER,
19
+ )
20
+
21
+ enc = tiktoken.get_encoding("cl100k_base")
22
+
23
+ def _tiktoken_counter(messages):
24
+ total = 0
25
+ for m in messages:
26
+ text_to_encode = ""
27
+
28
+ # 1. Extract content and tool_calls safely
29
+ if isinstance(m, dict):
30
+ content = m.get("content", "")
31
+ tool_calls = m.get("tool_calls", [])
32
+ else:
33
+ content = getattr(m, "content", "")
34
+ tool_calls = getattr(m, "tool_calls", [])
35
+
36
+ # 2. Handle string or list content
37
+ if isinstance(content, list):
38
+ text_to_encode += str(content)
39
+ else:
40
+ text_to_encode += str(content)
41
+
42
+ # 3. CRITICAL: Catch tool calls so they don't bypass the counter
43
+ if tool_calls:
44
+ text_to_encode += json.dumps(tool_calls)
45
+
46
+ # Encode and count
47
+ total += len(enc.encode(text_to_encode))
48
+
49
+ return total
50
+
51
+ # ---------------------------------------------------------
52
+ # 1. AGENT NODE
53
+ # ---------------------------------------------------------
54
+ def initialize_agent(is_vector_db_created: bool, tools: list):
55
+ # llm = ChatGoogleGenerativeAI( model="gemini-3.1-flash-lite-preview",temperature=0 )
56
+ llm = ChatGoogleGenerativeAI( model="gemma-4-31b-it",temperature=0 )
57
+ llm_with_tools = llm.bind_tools(tools)
58
+
59
+ message_trimmer = trim_messages(
60
+ max_tokens=200000,
61
+ strategy="last",
62
+ token_counter=_tiktoken_counter, # We Use the Gemini model's specific token counter but it will make http request which will take too long so just just tiktoken wich will be good enough
63
+ include_system=True, # NEVER delete the system prompt/repo map
64
+ allow_partial=False # Don't chop a message in half
65
+ )
66
+
67
+ # Call the model to generate a response based on the current state.
68
+ # Given the question, it will decide to retrieve using the retriever tool, or simply respond to the user.
69
+ def generate_query_or_respond(state: MessagesState):
70
+
71
+ if is_vector_db_created:
72
+ system_prompt = f"{AGENT_SYSTEM_PROMPT_HEADER}\n\n{AGENT_SYSTEM_PROMPT_TOOLS}\n\n{AGENT_SYSTEM_PROMPT_FOOTER}"
73
+ else:
74
+ system_prompt = f"{AGENT_SYSTEM_PROMPT_HEADER}\n\n{AGENT_SYSTEM_PROMPT_TOOLS_NO_DB}\n\n{AGENT_SYSTEM_PROMPT_FOOTER}"
75
+
76
+ # 1. Inject the system prompt into the message history
77
+ messages_to_evaluate = [{"role": "system", "content": system_prompt}] + state["messages"]
78
+
79
+ # 2. to save context window,or not to runout of tokens we trim the context from past which in above max limit that we
80
+ trimmed_messages = message_trimmer.invoke(messages_to_evaluate)
81
+
82
+ # 3. Generate the response (PASS IN THE TRIMMED MESSAGES)
83
+ response = llm_with_tools.invoke(trimmed_messages)
84
+
85
+ return {"messages": [response]}
86
+ return generate_query_or_respond
87
+
88
+
89
+ # ---------------------------------------------------------
90
+ # 2. THE LEAD ARCHITECT (SUPERVISOR NODE)
91
+ # ---------------------------------------------------------
92
+
93
+ # 1. Define the decision schema
94
+ class SupervisorDecision(BaseModel):
95
+ reasoning: str = Field(
96
+ description="1. What did the user ask? 2. What raw data is in the tool outputs? 3. Is the raw data sufficient to answer the user?"
97
+ )
98
+ status: Literal["ACCEPT", "REJECT"] = Field(
99
+ description="ACCEPT if the RAW TOOL OUTPUTS contain enough info to answer the user. REJECT if the agent needs to search for more specific files."
100
+ )
101
+ content: str = Field(
102
+ description="If ACCEPT: Write the final, exhaustive response to the user. If REJECT: Write targeted instructions telling the agent what to search for next."
103
+ )
104
+
105
+ def initialize_supervisor():
106
+
107
+ powerful_llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.2,max_output_tokens=65536)
108
+ powerful_agent = powerful_llm.with_structured_output(SupervisorDecision)
109
+
110
+ def supervisor_node(state: MessagesState):
111
+ # Calculate iteration count based on previous feedback messages
112
+ iteration_count = sum(
113
+ 1 for m in state["messages"]
114
+ if isinstance(m, HumanMessage) and "SUPERVISOR FEEDBACK:" in m.content
115
+ )
116
+
117
+ system_prompt = SUPERVISOR_SYSTEM_PROMPT
118
+
119
+ # STRUCTURAL SAFEGUARD: Force accept after 2 rejections
120
+ if iteration_count >= 2:
121
+ system_prompt += """
122
+ \n\n*** CRITICAL OVERRIDE ***
123
+ You have rejected the researcher 2 times. You MUST now output status="ACCEPT" and synthesize the best possible final answer from ALL available evidence, explicitly noting what is implicit vs explicit. DO NOT REJECT.
124
+ """
125
+
126
+ messages_to_evaluate = [{"role": "system", "content": system_prompt}] + state["messages"]
127
+ decision = powerful_agent.invoke(messages_to_evaluate)
128
+
129
+ if decision.status == "ACCEPT":
130
+ return {"messages": [AIMessage(content=decision.content)]}
131
+ else:
132
+ return {"messages": [HumanMessage(content=f"SUPERVISOR FEEDBACK: {decision.content}")]}
133
+ return supervisor_node
134
+
135
+ # --- Custom Router for the Supervisor ---
136
+ def route_supervisor(state: MessagesState):
137
+ last_message = state["messages"][-1]
138
+ # If the supervisor returned an AIMessage, it ACCEPTED the work. We are done.
139
+ if isinstance(last_message, AIMessage):
140
+ return END
141
+ # If it returned a HumanMessage, it REJECTED the work. Send back to the researcher.
142
+ return "agent"
143
+
144
+
145
+
146
+
147
+ def build_workflow(
148
+ repo_storage: Path,
149
+ is_vector_db_created: bool,
150
+ all_splits: list[Document] = None,
151
+ vector_db = None
152
+ ):
153
+ tools = get_code_search_tools(repo_storage,is_vector_db_created,all_splits,vector_db)
154
+
155
+ agent_node = initialize_agent(is_vector_db_created,tools)
156
+ supervisor_node = initialize_supervisor()
157
+
158
+ # --- Building the Graph ---
159
+ workflow = StateGraph(MessagesState)
160
+
161
+ # --- Add our nodes to the graph ---
162
+ # Set the entry point: Start by calling the agent
163
+
164
+
165
+ workflow.add_edge(START, "agent")
166
+ workflow.add_node("agent", agent_node)
167
+ workflow.add_node("tools", ToolNode(tools))
168
+ workflow.add_node("supervisor",supervisor_node)
169
+
170
+
171
+ # --- Routing ---
172
+
173
+ # After the 'agent' node runs, check the output.
174
+ # tools_condition automatically checks: Did the agent output a tool_call?
175
+ # - If YES: route to the "tools" node.
176
+ # - If NO: route to END.
177
+ workflow.add_conditional_edges(
178
+ "agent",
179
+ tools_condition,
180
+ {
181
+ "tools": "tools", # If tool call, go to tools
182
+ END: "supervisor" # (CHANGED) If done with tools, go to supervisor instead of END
183
+ }
184
+ )
185
+
186
+
187
+ # After the tools finish executing, ALWAYS route back to the agent.
188
+ # The agent needs to read the tool output and decide what to do next.
189
+ workflow.add_edge("tools", "agent")
190
+ workflow.add_conditional_edges("supervisor", route_supervisor, { "agent":"agent",END : END })
191
+
192
+ # --- Compile ---
193
+ return workflow.compile()
back_end/agent/tools.py ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import fnmatch
3
+ import itertools
4
+ from pathlib import Path
5
+ from langchain.tools import tool
6
+ from langchain_community.retrievers import BM25Retriever
7
+ from langchain_core.documents import Document
8
+ from langchain_core.tools import BaseTool
9
+ from config import EXCLUDE_PATTERNS
10
+
11
+
12
+
13
+ def get_code_search_tools(
14
+ repo_storage: Path,
15
+ is_vector_db_created: bool,
16
+ all_splits: list[Document] = None,
17
+ vector_db = None
18
+ )-> list[BaseTool]:
19
+
20
+ # Initialize BM25 only if we have vector data
21
+ bm25_retriever = None
22
+ if is_vector_db_created and all_splits:
23
+ bm25_retriever = BM25Retriever.from_documents(all_splits, k=10)
24
+ @tool
25
+ def exact_code_search(search_pattern: str) -> str:
26
+ """
27
+ Search the codebase for an exact literal string.
28
+ Use this tool FIRST when looking for exact function definitions, variable usages,
29
+ specific syntax, or known class names.
30
+ Input should be the exact string you want to find. (Note: Regex is NOT supported).
31
+ """
32
+ try:
33
+ base_path = repo_storage.resolve()
34
+ MAX_LINES = 350
35
+ matches = []
36
+
37
+ # 1. Updated validation function using your global EXCLUDE_PATTERNS
38
+ def is_valid_file(p: Path) -> bool:
39
+ # Skip non-files and symlinks
40
+ if p.is_symlink() or not p.is_file():
41
+ return False
42
+
43
+ # Convert path to string with forward slashes for consistent glob matching
44
+ path_str = p.as_posix()
45
+
46
+ # Check against global patterns
47
+ for pattern in EXCLUDE_PATTERNS:
48
+ if fnmatch.fnmatch(path_str, pattern):
49
+ return False
50
+
51
+ return True
52
+
53
+ # 2. The combined search logic
54
+ for file_path in base_path.rglob("*"):
55
+ if not is_valid_file(file_path):
56
+ continue
57
+
58
+ try:
59
+ rel_path = file_path.relative_to(repo_storage).as_posix()
60
+
61
+ with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
62
+ for i, line in enumerate(f, 1):
63
+ if search_pattern in line:
64
+ matches.append(f"{rel_path}:{i}:{line.strip()}")
65
+
66
+ if len(matches) >= MAX_LINES:
67
+ break
68
+ except Exception:
69
+ continue
70
+
71
+ if len(matches) >= MAX_LINES:
72
+ break
73
+
74
+ # 3. Format output
75
+ if not matches:
76
+ return f"No exact matches found for '{search_pattern}'."
77
+
78
+ output = "\n".join(matches)
79
+
80
+ if len(matches) >= MAX_LINES:
81
+ return f"--- EXACT MATCHES (first {MAX_LINES}) ---\n{output}\n\n... (Output truncated to save context)"
82
+ else:
83
+ return f"--- EXACT MATCHES ---\n{output}"
84
+
85
+ except Exception as e:
86
+ return f"Search error: {str(e)}"
87
+
88
+ # -----------------------------------------------------------------------------
89
+ # Tool 2: Retrival using BM25
90
+ # -----------------------------------------------------------------------------
91
+ @tool
92
+ def keyword_code_search(query: str, k: int = 5) -> str:
93
+ """
94
+ Search the codebase using exact keyword matching (BM25).
95
+ Use this tool when looking for files containing specific keywords, error messages,
96
+ or terminology where exact syntax matching isn't strictly required but specific words are important.
97
+ Input should be a set of relevant keywords and the number of chunks (k) to return.
98
+ """
99
+
100
+ try:
101
+ # Update k dynamically so the agent can control how much context it gets
102
+ bm25_retriever.k = k
103
+ docs = bm25_retriever.invoke(query)
104
+
105
+ if not docs:
106
+ return f"No keyword matches found for '{query}'."
107
+
108
+ formatted_chunks = []
109
+ for doc in docs:
110
+ source_file = doc.metadata.get("source", "Unknown File")
111
+ formatted_chunks.append(f"--- File_Source: {source_file} ---\n{doc.page_content}")
112
+
113
+ return "\n\n".join(formatted_chunks)
114
+
115
+ except Exception as e:
116
+ return f"Keyword search error: {str(e)}"
117
+
118
+ # -----------------------------------------------------------------------------
119
+ # Tool 3: Simple retrival from vectordb based on cosine sililarity
120
+ # -----------------------------------------------------------------------------
121
+ @tool
122
+ def semantic_code_search(query: str, k: int = 5) -> str:
123
+ """
124
+ Search the codebase using semantic vector embeddings.
125
+ Use this tool to understand concepts, architecture, or ask natural language questions
126
+ like "how does the database connection work?" or "where is the staging logic?"
127
+ Do NOT use this for exact variable lookups or specific function signatures.
128
+ Input should be a natural language query and the number of chunks (k) to return.
129
+ """
130
+ try:
131
+ # Create a dynamic retriever on the fly to inject the agent's requested 'k'
132
+ # Adjust search_type to "similarity" or "similarity_score_threshold" based on your DB setup
133
+ temp_dense_retriever = vector_db.as_retriever(
134
+ search_type="similarity",
135
+ search_kwargs={"k": k}
136
+ )
137
+ docs = temp_dense_retriever.invoke(query)
138
+
139
+ if not docs:
140
+ return f"No semantic matches found for '{query}'."
141
+
142
+ formatted_chunks = []
143
+ for doc in docs:
144
+ source_file = doc.metadata.get("source", "Unknown File")
145
+ formatted_chunks.append(f"--- File_Source: {source_file} ---\n{doc.page_content}")
146
+
147
+ return "\n\n".join(formatted_chunks)
148
+
149
+ except Exception as e:
150
+ return f"Semantic search error: {str(e)}"
151
+ # -----------------------------------------------------------------------------
152
+ # Tool 4: get contents of a specified file
153
+ # -----------------------------------------------------------------------------
154
+
155
+ @tool
156
+ def get_specific_file(file_path: str, start_line: int = None, end_line: int = None) -> str:
157
+ """
158
+ Get the text contents of a specific file from the repository.
159
+ - If start_line and end_line are NOT provided, it returns the entire file (up to 50,000 bytes).
160
+ - If start_line and end_line ARE provided (1-indexed), it returns only those specific lines, bypassing the file size limit.
161
+ Use this tool to read entire small files, or to paginate through massive files by requesting specific line ranges.
162
+ Input should be the exact file path, and optionally the start and end line numbers.
163
+ """
164
+ try:
165
+ clean_path = file_path.lstrip('/')
166
+ target_path = (repo_storage / clean_path).resolve()
167
+
168
+ # 1. Security Check: Prevent path traversal
169
+ if not target_path.is_relative_to(repo_storage):
170
+ return "Error: Access denied. You cannot read files outside the repository root."
171
+
172
+ absolute_file_path = str(target_path)
173
+
174
+ # ---------------------------------------------------------
175
+ # MODE 1: Specific Line Range Requested
176
+ # ---------------------------------------------------------
177
+ if start_line is not None or end_line is not None:
178
+ # Handle cases where the LLM provides one but not the other
179
+ start_line = start_line if start_line is not None else 1
180
+ end_line = end_line if end_line is not None else (start_line + 300)
181
+
182
+ # Sanity checks for the agent
183
+ if start_line < 1:
184
+ return "Error: start_line must be >= 1."
185
+ if end_line < start_line:
186
+ return "Error: end_line must be >= start_line."
187
+
188
+ # Protect context window: limit the maximum lines requested at once
189
+ MAX_LINES_TO_READ = 500
190
+ if (end_line - start_line + 1) > MAX_LINES_TO_READ:
191
+ return f"Error: You can only request up to {MAX_LINES_TO_READ} lines at a time to save context space."
192
+
193
+ try:
194
+ # Use itertools.islice to lazily read only the needed lines without loading the whole file into RAM
195
+ with open(absolute_file_path, 'r', encoding='utf-8', errors='ignore') as f:
196
+ # islice is 0-indexed, so we subtract 1 from start_line. end_line is exclusive.
197
+ lines = list(itertools.islice(f, start_line - 1, end_line))
198
+
199
+ if not lines:
200
+ return f"Error: The requested lines ({start_line}-{end_line}) are out of bounds for this file."
201
+
202
+ content = "".join(lines)
203
+ return f"--- File_Source: {file_path} (Lines {start_line}-{end_line}) ---\n{content}"
204
+
205
+ except Exception as e:
206
+ return f"Error reading specific lines from {file_path}: {str(e)}"
207
+
208
+ # ---------------------------------------------------------
209
+ # MODE 2: Entire File Requested
210
+ # ---------------------------------------------------------
211
+ else:
212
+ # Check file size using the ABSOLUTE path
213
+ file_size = os.path.getsize(absolute_file_path)
214
+
215
+ # Rough estimation: 1 byte is roughly 1 character in standard encoding
216
+ if file_size > 50000:
217
+ return (f"Error: The file '{file_path}' is too large ({file_size} bytes) to load entirely. "
218
+ f"Please use this tool again and provide `start_line` and `end_line` parameters to read specific sections or consider other tools such as exact_code_serch.")
219
+
220
+ with open(absolute_file_path, 'r', encoding='utf-8', errors='ignore') as f:
221
+ content = f.read()
222
+
223
+ return f"--- File_Source: {file_path} ---\n{content}"
224
+
225
+ except FileNotFoundError:
226
+ return f"Error: The file '{file_path}' was not found. Please verify the path."
227
+ except Exception as e:
228
+ return f"Error loading {file_path}: {str(e)}"
229
+
230
+ # -----------------------------------------------------------------------------
231
+ # Tool 5: directory look up [like ls in terminal]
232
+ # -----------------------------------------------------------------------------
233
+ @tool
234
+ def list_directory_contents(directory_path: str) -> str:
235
+ """
236
+ List the contents of a specific directory within the repository.
237
+ Use this tool to explore the folder structure, see what files exist,
238
+ and understand how the codebase is organized.
239
+ Input should be a relative path from the repository root (e.g., 'repo_name/components','repo_name','repo_name/data/readmes/).
240
+ """
241
+ try:
242
+ # 1. Security & Path Resolution (Crucial!)
243
+ base_path = Path(repo_storage).resolve()
244
+
245
+ # Handle cases where the LLM passes absolute paths or starts with '/'
246
+ clean_path = directory_path.lstrip('/')
247
+ target_path = (base_path / clean_path).resolve()
248
+
249
+ # Prevent Path Traversal Attacks (e.g., agent trying to read '../../etc/passwd')
250
+ if not target_path.is_relative_to(base_path):
251
+ return "Error: Access denied. You cannot read directories outside the repository root."
252
+
253
+ # 2. State Checking
254
+ if not target_path.exists():
255
+ return f"Error: The directory '{directory_path}' does not exist in this repository."
256
+
257
+ if not target_path.is_dir():
258
+ return (f"Error: '{directory_path}' is a file, not a directory. "
259
+ f"If you want to read it, use the get_specific_file tool.")
260
+
261
+ # 3. Gather Context-Rich Contents
262
+ items = []
263
+ for entry in os.scandir(target_path):
264
+ # Skip annoying OS files
265
+ if entry.name in ['.DS_Store', 'Thumbs.db']:
266
+ continue
267
+
268
+ if entry.is_dir():
269
+ items.append(f"πŸ“ [DIR] {entry.name}/")
270
+ else:
271
+ # Add file sizes so the agent knows if a file is safe to read whole
272
+ size_kb = entry.stat().st_size / 1024
273
+ items.append(f"πŸ“„ [FILE] {entry.name} ({size_kb:.1f} KB)")
274
+
275
+ # Sort directories first, then files alphabetically
276
+ items.sort(key=lambda x: (not x.startswith("πŸ“"), x.lower()))
277
+
278
+ if not items:
279
+ return f"The directory '{directory_path}' is completely empty."
280
+
281
+ # 4. Context Window Protection
282
+ MAX_ITEMS = 200
283
+ if len(items) > MAX_ITEMS:
284
+ truncated_count = len(items) - MAX_ITEMS
285
+ items = items[:MAX_ITEMS]
286
+ items.append(f"\n... (Output truncated: {truncated_count} more items not shown to save space) ...")
287
+
288
+ return f"--- Contents of /{clean_path} ---\n" + "\n".join(items)
289
+
290
+ except Exception as e:
291
+ return f"An error occurred while reading the directory: {str(e)}"
292
+ # -----------------------------------------------------------------------------
293
+ # Tool 6: find_file_path_by_pattern
294
+ # -----------------------------------------------------------------------------
295
+ @tool
296
+ def find_file_path_by_pattern(filename_pattern: str) -> str:
297
+ """
298
+ Search the repository for files matching a specific name or pattern.
299
+ Use this tool when you know the name of the file or script you are looking for
300
+ (e.g., 'build_npm_package.py' or '*.md').
301
+ Input should be a filename or glob pattern.
302
+ """
303
+ try:
304
+ base_path = repo_storage.resolve()
305
+ matches = []
306
+
307
+ # Walk through all files
308
+ for file_path in base_path.rglob("*"):
309
+ if file_path.is_file():
310
+ # Check if the filename matches the pattern
311
+ if fnmatch.fnmatch(file_path.name.lower(), filename_pattern.lower()):
312
+ rel_path = file_path.relative_to(base_path)
313
+ matches.append(rel_path.as_posix())
314
+
315
+ if len(matches) >= 200:
316
+ output = '\n'.join(matches)
317
+ return f"--- FOUND FILES(truncated to 200) ---\n{output}"
318
+
319
+ if not matches:
320
+ return f"No files found matching the name '{filename_pattern}'."
321
+
322
+ output = '\n'.join(matches)
323
+ return f"--- FOUND FILES ---\n{output}"
324
+
325
+ except Exception as e:
326
+ return f"File search error: {str(e)}"
327
+
328
+ tools = [ exact_code_search, get_specific_file, list_directory_contents, find_file_path_by_pattern]
329
+
330
+ if is_vector_db_created :
331
+ tools.extend([semantic_code_search,keyword_code_search])
332
+
333
+ return tools
334
+