lailaelkoussy commited on
Commit
110a02b
·
1 Parent(s): 5f8582c

update doc

Browse files
Files changed (1) hide show
  1. gradio_mcp_space.py +749 -152
gradio_mcp_space.py CHANGED
@@ -108,16 +108,42 @@ def initialize_knowledge_graph(
108
  @observe(as_type="tool")
109
  def get_node_info(node_id: str) -> str:
110
  """
111
- Get detailed information about a node in the knowledge graph.
112
-
113
- Returns information including the node's type, name, description,
114
- declared/called entities, and type-specific details.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  Args:
117
- node_id: The ID of the node to retrieve information for
118
 
119
  Returns:
120
- str: A formatted string with node information
 
 
 
 
 
121
  """
122
  if knowledge_graph is None:
123
  return "Error: Knowledge graph not initialized"
@@ -195,15 +221,37 @@ def get_node_info(node_id: str) -> str:
195
  @observe(as_type="tool")
196
  def get_node_edges(node_id: str) -> str:
197
  """
198
- List all incoming and outgoing edges for a node.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
- Shows relationships to other nodes in the knowledge graph.
 
 
201
 
202
  Args:
203
- node_id: The ID of the node whose edges to list
204
 
205
  Returns:
206
- str: A formatted string showing all edges
207
  """
208
  if knowledge_graph is None:
209
  return "Error: Knowledge graph not initialized"
@@ -247,17 +295,39 @@ Incoming Edges ({len(incoming)}):
247
  @observe(as_type="tool")
248
  def search_nodes(query: str, limit: int = 10, page: int = 1) -> str:
249
  """
250
- Search for chunk nodes in the knowledge graph by query string.
251
-
252
- Uses keyword search via the code index.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
 
254
  Args:
255
- query: The search string to match against code index
256
- limit: Maximum number of results to return per page (default: 10)
257
- page: Page number for pagination, 1-indexed (default: 1)
258
 
259
  Returns:
260
- str: A formatted string with search results
261
  """
262
  if knowledge_graph is None:
263
  return "Error: Knowledge graph not initialized"
@@ -321,18 +391,32 @@ def search_nodes(query: str, limit: int = 10, page: int = 1) -> str:
321
  @observe(as_type="tool")
322
  def get_graph_stats() -> str:
323
  """
324
- Get comprehensive statistics about the knowledge graph.
325
-
326
- Returns detailed information about the repository structure including:
327
- - Chunks: Code segments that represent portions of files (functions, classes, etc.)
328
- - Entities: Programming constructs like classes, functions, methods, variables
329
- - Files and directories in the repository
330
- - Relationships between different components
331
-
332
- For entity nodes, provides a breakdown by entity type (class, function, method, etc.).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
 
334
  Returns:
335
- str: A formatted string with comprehensive graph statistics
336
  """
337
  if knowledge_graph is None:
338
  return "Error: Knowledge graph not initialized"
@@ -436,17 +520,44 @@ Edge Relations:
436
  @observe(as_type="tool")
437
  def list_nodes_by_type(node_type: str, limit: int = 20, page: int = 1) -> str:
438
  """
439
- List nodes of a specific type in the knowledge graph.
440
-
441
- For entities, use entity_type (e.g., 'class', 'function', 'method').
442
- For other nodes, use node_type (e.g., 'file', 'chunk', 'directory').
443
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
444
  Args:
445
- node_type: The type of nodes to list (e.g., 'function', 'class', 'file')
446
- limit: Maximum number of nodes to return (default: 20)
447
- page: Page number for pagination (default: 1)
 
448
  Returns:
449
- str: A formatted string with matching nodes
450
  """
451
  if knowledge_graph is None:
452
  return "Error: Knowledge graph not initialized"
@@ -537,15 +648,42 @@ def list_nodes_by_type(node_type: str, limit: int = 20, page: int = 1) -> str:
537
  @observe(as_type="tool")
538
  def get_neighbors(node_id: str, limit: int = 20, page: int = 1) -> str:
539
  """
540
- Retrieves all nodes directly connected to a given node.
541
-
542
- Retrieves neighboring nodes with their relationship types.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
543
 
544
  Args:
545
- node_id: The ID of the node whose neighbors to retrieve
 
 
546
 
547
  Returns:
548
- str: A formatted string showing all neighbors
549
  """
550
  if knowledge_graph is None:
551
  return "Error: Knowledge graph not initialized"
@@ -614,15 +752,39 @@ def get_neighbors(node_id: str, limit: int = 20, page: int = 1) -> str:
614
  @observe(as_type="tool")
615
  def go_to_definition(entity_name: str) -> str:
616
  """
617
- Retrieve chunk node(s) where entity is declared or defined in the codebase.
618
-
619
- Locates and retrieves the declaration point for functions, classes, variables, etc.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
620
 
621
  Args:
622
- entity_name: The name of the entity to retrieve the definition for
623
 
624
  Returns:
625
- str: A formatted string with definition locations
626
  """
627
  if knowledge_graph is None:
628
  return "Error: Knowledge graph not initialized"
@@ -663,17 +825,46 @@ def go_to_definition(entity_name: str) -> str:
663
  @observe(as_type="tool")
664
  def find_usages(entity_name: str, limit: int = 20, page: int = 1) -> str:
665
  """
666
- Retrieve all usages or calls of an entity in the codebase.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
667
 
668
- Shows where functions, classes, variables, etc. are used.
 
 
669
 
670
  Args:
671
- entity_name: The name of the entity to retrieve usages for
672
- limit: Maximum number of usages to return per page (default: 20)
673
- page: Page number for pagination, 1-indexed (default: 1)
674
 
675
  Returns:
676
- str: A formatted string with usage locations
677
  """
678
  if knowledge_graph is None:
679
  return "Error: Knowledge graph not initialized"
@@ -738,15 +929,40 @@ def find_usages(entity_name: str, limit: int = 20, page: int = 1) -> str:
738
  @observe(as_type="tool")
739
  def get_file_structure(file_path: str) -> str:
740
  """
741
- Get an overview of the structure of a file.
742
-
743
- Shows chunks and declared entities within a specific file.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
744
 
745
  Args:
746
- file_path: The path of the file to get the structure for
747
 
748
  Returns:
749
- str: A formatted string with file structure
750
  """
751
  if knowledge_graph is None:
752
  return "Error: Knowledge graph not initialized"
@@ -792,18 +1008,42 @@ def get_file_structure(file_path: str) -> str:
792
  @observe(as_type="tool")
793
  def get_related_chunks(chunk_id: str, relation_type: str = "calls", limit: int = 20, page: int = 1) -> str:
794
  """
795
- Retrieve chunks related to a given chunk by a specific relationship.
796
-
797
- Retrieve chunks connected via relationships like 'calls', 'contains', etc.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
798
 
799
  Args:
800
- chunk_id: The ID of the chunk to retrieve related chunks for
801
- relation_type: The type of relationship to filter by (default: 'calls')
802
- limit: Maximum number of results per page (default: 20)
803
- page: Page number for pagination, 1-indexed (default: 1)
804
 
805
  Returns:
806
- str: A formatted string with related chunks
807
  """
808
  if knowledge_graph is None:
809
  return "Error: Knowledge graph not initialized"
@@ -891,19 +1131,54 @@ def list_all_entities(
891
  called_in_repo: Optional[bool] = None
892
  ) -> str:
893
  """
894
- List all entities tracked in the knowledge graph with filtering and pagination options.
895
-
896
- Shows entity types, declaration counts, and usage counts.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
897
 
898
  Args:
899
- limit: Maximum number of entities to return per page (default: 50)
900
- page: Page number for pagination, 1-indexed (default: 1)
901
- entity_type: Filter by entity type ('class', 'function', 'method', 'variable', 'parameter', 'function_call', 'method_call')
902
- declared_in_repo: If True, only return entities with declarations. If False, only entities without declarations. If None, return all.
903
- called_in_repo: If True, only return entities that have usages/calls in the repo. If False, only entities without usages. If None, return all.
904
 
905
  Returns:
906
- str: A formatted string with all entities for the requested page
907
  """
908
  if knowledge_graph is None:
909
  return "Error: Knowledge graph not initialized"
@@ -1037,16 +1312,41 @@ def list_all_entities(
1037
  @observe(as_type="tool")
1038
  def diff_chunks(node_id_1: str, node_id_2: str) -> str:
1039
  """
1040
- Show the diff between two code chunks or nodes.
1041
-
1042
- Compares the content of two nodes and shows differences.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1043
 
1044
  Args:
1045
- node_id_1: The ID of the first node/chunk
1046
- node_id_2: The ID of the second node/chunk
1047
 
1048
  Returns:
1049
- str: A formatted string with the diff
1050
  """
1051
  if knowledge_graph is None:
1052
  return "Error: Knowledge graph not initialized"
@@ -1080,16 +1380,41 @@ def diff_chunks(node_id_1: str, node_id_2: str) -> str:
1080
  @observe(as_type="tool")
1081
  def print_tree(root_id: str = "root", max_depth: int = 3) -> str:
1082
  """
1083
- Show a tree view of the repository structure.
1084
-
1085
- Displays a hierarchical tree starting from a given node.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1086
 
1087
  Args:
1088
- root_id: The node ID to start the tree from (default: 'root')
1089
- max_depth: Maximum depth to show (default: 3)
1090
 
1091
  Returns:
1092
- str: A formatted string with the tree structure
1093
  """
1094
  if knowledge_graph is None:
1095
  return "Error: Knowledge graph not initialized"
@@ -1144,15 +1469,40 @@ def print_tree(root_id: str = "root", max_depth: int = 3) -> str:
1144
  @observe(as_type="tool")
1145
  def entity_relationships(node_id: str) -> str:
1146
  """
1147
- Show all relationships for a given entity or node.
1148
-
1149
- Displays incoming and outgoing relationships with their types.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1150
 
1151
  Args:
1152
- node_id: The node/entity ID to explore relationships for
1153
 
1154
  Returns:
1155
- str: A formatted string with all relationships
1156
  """
1157
  if knowledge_graph is None:
1158
  return "Error: Knowledge graph not initialized"
@@ -1195,22 +1545,50 @@ def entity_relationships(node_id: str) -> str:
1195
  @observe(as_type="tool")
1196
  def search_by_type_and_name(node_type: str, name_query: str, limit: int = 10, page: int = 1, partial_allowed: bool = True) -> str:
1197
  """
1198
- Search for nodes/entities by type and name substring with partial matching support.
1199
-
1200
- Filters nodes by type and searches for matching names. Supports partial matching
1201
- so searching for 'Embedding' will find 'BertEmbeddings', 'LlamaRotaryEmbedding', etc.
1202
-
1203
- For entities, searches by entity_type (e.g., 'class', 'function', 'method').
1204
- For other nodes, searches by node_type (e.g., 'file', 'chunk', 'directory').
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1205
 
1206
  Args:
1207
- node_type: Type of node/entity (e.g., 'function', 'class', 'file', 'chunk', 'directory')
1208
- name_query: Substring to match in the name (case-insensitive, supports partial matches)
1209
- limit: Maximum results to return (default: 10)
1210
- partial_allowed: Enable partial matching (default: True). If False, requires exact substring match.
 
1211
 
1212
  Returns:
1213
- str: A formatted string with matching nodes
1214
  """
1215
  if knowledge_graph is None:
1216
  return "Error: Knowledge graph not initialized"
@@ -1338,16 +1716,45 @@ def search_by_type_and_name(node_type: str, name_query: str, limit: int = 10, pa
1338
  @observe(as_type="tool")
1339
  def get_chunk_context(node_id: str) -> str:
1340
  """
1341
- Get the full content of a code chunk along with its surrounding chunks.
1342
-
1343
- Returns the full content of the previous, current, and next chunks,
1344
- organized by file and joined together.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1345
 
1346
  Args:
1347
- node_id: The node/chunk ID to get context for
1348
 
1349
  Returns:
1350
- str: The full content of surrounding code chunks
1351
  """
1352
 
1353
 
@@ -1383,15 +1790,38 @@ def get_chunk_context(node_id: str) -> str:
1383
  @observe(as_type="tool")
1384
  def get_file_stats(path: str) -> str:
1385
  """
1386
- Get statistics for a file or directory.
 
 
 
 
1387
 
1388
- Shows number of entities, lines, chunks, etc.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1389
 
1390
  Args:
1391
- path: The file or directory path to get statistics for
1392
 
1393
  Returns:
1394
- str: A formatted string with file statistics
1395
  """
1396
  if knowledge_graph is None:
1397
  return "Error: Knowledge graph not initialized"
@@ -1444,17 +1874,46 @@ def get_file_stats(path: str) -> str:
1444
  @observe(as_type="tool")
1445
  def find_path(source_id: str, target_id: str, max_depth: int = 5) -> str:
1446
  """
1447
- Retrieve the shortest path between two nodes in the knowledge graph.
1448
-
1449
- Uses graph traversal to retrieve connections between nodes.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1450
 
1451
  Args:
1452
- source_id: The ID of the source node
1453
- target_id: The ID of the target node
1454
- max_depth: Maximum depth to search for a path (default: 5)
1455
 
1456
  Returns:
1457
- str: A formatted string showing the path
1458
  """
1459
  if knowledge_graph is None:
1460
  return "Error: Knowledge graph not initialized"
@@ -1493,17 +1952,50 @@ def find_path(source_id: str, target_id: str, max_depth: int = 5) -> str:
1493
  @observe(as_type="tool")
1494
  def get_subgraph(node_id: str, depth: int = 2, edge_types: Optional[str] = None) -> str:
1495
  """
1496
- Retrieve a subgraph around a node up to a specified depth.
1497
-
1498
- Optionally filters by edge types (comma-separated).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1499
 
1500
  Args:
1501
- node_id: The ID of the central node
1502
- depth: The depth/radius of the subgraph to Retrieve (default: 2)
1503
- edge_types: Optional comma-separated list of edge types (e.g., 'calls,contains')
1504
 
1505
  Returns:
1506
- str: A formatted string describing the subgraph
1507
  """
1508
  if knowledge_graph is None:
1509
  return "Error: Knowledge graph not initialized"
@@ -1545,20 +2037,57 @@ def get_subgraph(node_id: str, depth: int = 2, edge_types: Optional[str] = None)
1545
  @observe(as_type="tool")
1546
  def list_files_in_directory(directory_path: str = "", pattern: str = "*", recursive: bool = True, limit: int = 50, page: int = 1) -> str:
1547
  """
1548
- List files in a directory with optional glob pattern matching.
1549
-
1550
- This provides hierarchical file listing, showing files within directories
1551
- rather than just top-level files. Supports glob patterns for filtering.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1552
 
1553
  Args:
1554
- directory_path: Path to the directory to list (empty string for root/all files)
1555
- pattern: Glob pattern to filter files (e.g., '*.py', 'test_*.py', '**/*.js')
1556
- recursive: Whether to search recursively in subdirectories (default: True)
1557
- limit: Maximum number of files to return per page (default: 50)
1558
- page: Page number for pagination, 1-indexed (default: 1)
1559
 
1560
  Returns:
1561
- str: A formatted string with matching files
1562
  """
1563
  if knowledge_graph is None:
1564
  return "Error: Knowledge graph not initialized"
@@ -1673,17 +2202,48 @@ def list_files_in_directory(directory_path: str = "", pattern: str = "*", recurs
1673
  @observe(as_type="tool")
1674
  def find_files_importing(module_or_entity: str, limit: int = 30, page: int = 1) -> str:
1675
  """
1676
- Retrieve all files that import a specific module or entity.
1677
-
1678
- Searches for import statements and usage patterns across the codebase.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1679
 
1680
  Args:
1681
- module_or_entity: The name of the module or entity to retrieve imports of
1682
- limit: Maximum number of results to return per page (default: 30)
1683
- page: Page number for pagination, 1-indexed (default: 1)
1684
 
1685
  Returns:
1686
- str: A formatted string with files that import the specified module/entity
1687
  """
1688
  if knowledge_graph is None:
1689
  return "Error: Knowledge graph not initialized"
@@ -1801,17 +2361,54 @@ def find_files_importing(module_or_entity: str, limit: int = 30, page: int = 1)
1801
  @observe(as_type="tool")
1802
  def get_concept_overview(concept: str, limit: int = 15) -> str:
1803
  """
1804
- Get a high-level overview of a concept across the codebase.
1805
-
1806
- Combines multiple search strategies to provide a comprehensive view of how
1807
- a concept (like 'embeddings', 'authentication', 'caching') is implemented.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1808
 
1809
  Args:
1810
- concept: The concept to search for (e.g., 'embedding', 'authentication', 'cache')
1811
- limit: Maximum number of results per category (default: 15)
1812
 
1813
  Returns:
1814
- str: A formatted overview of the concept across the codebase
1815
  """
1816
  if knowledge_graph is None:
1817
  return "Error: Knowledge graph not initialized"
@@ -1984,8 +2581,8 @@ def create_gradio_app():
1984
  with gr.Column():
1985
  node_output = gr.Textbox(label="Node Information", lines=20, max_lines=30)
1986
  node_info_btn.click(fn=get_node_info, inputs=node_id_input, outputs=node_output)
1987
- node_edges_btn.click(fn=get_node_edges, inputs=node_id_input, outputs=node_output)
1988
  gr.Markdown("#Get Node Info:" + _tool_doc_md(get_node_info))
 
1989
  gr.Markdown("#Get Node Edges:" + _tool_doc_md(get_node_edges))
1990
 
1991
  with gr.Tab("🏗️ Structure"):
 
108
  @observe(as_type="tool")
109
  def get_node_info(node_id: str) -> str:
110
  """
111
+ Retrieve comprehensive details about any node in the Transformers library knowledge graph.
112
+
113
+ PURPOSE:
114
+ Use this tool to inspect the full metadata and content of a specific node when you need
115
+ to understand what a particular code element contains, what entities it declares or calls,
116
+ and how it fits into the codebase structure.
117
+
118
+ WHEN TO USE:
119
+ - After finding a node ID from search_nodes, list_nodes_by_type, or get_neighbors
120
+ - To see the actual code content of a chunk node
121
+ - To understand what entities (classes, functions, variables) are declared in a file or chunk
122
+ - To examine entity metadata including aliases, declaration locations, and usage locations
123
+ - To get file metadata like language and path information
124
+
125
+ NODE TYPES SUPPORTED:
126
+ - 'chunk': Code segments with content, declared/called entities, and file position
127
+ - 'file': Source files with path, language, and entity summaries
128
+ - 'directory': Folder nodes with path information
129
+ - 'entity': Programming constructs (classes, functions, methods, variables) with declaration/usage tracking
130
+ - 'repo': Repository root node
131
+
132
+ TYPICAL WORKFLOW:
133
+ 1. search_nodes("attention mechanism") -> get node IDs
134
+ 2. get_node_info(node_id) -> see full content and metadata
135
+ 3. get_neighbors(node_id) or find_usages(entity_name) -> explore relationships
136
 
137
  Args:
138
+ node_id: The unique identifier of the node (e.g., 'src/transformers/models/bert/modeling_bert.py::chunk_3' for chunks, or 'BertModel' for entities)
139
 
140
  Returns:
141
+ str: Formatted details including node type, name, description, content (for chunks), declared entities, called entities, and type-specific metadata
142
+
143
+ Example node_ids:
144
+ - Chunk: 'src/transformers/models/bert/modeling_bert.py::chunk_5'
145
+ - File: 'src/transformers/models/bert/modeling_bert.py'
146
+ - Entity: 'BertModel', 'forward', 'attention_mask'
147
  """
148
  if knowledge_graph is None:
149
  return "Error: Knowledge graph not initialized"
 
221
  @observe(as_type="tool")
222
  def get_node_edges(node_id: str) -> str:
223
  """
224
+ List all graph edges (relationships) connected to a specific node in the knowledge graph.
225
+
226
+ PURPOSE:
227
+ Use this tool to understand how a node is connected to other parts of the codebase.
228
+ Reveals the dependency structure and relationships that link code elements together.
229
+
230
+ WHEN TO USE:
231
+ - To discover what code calls or depends on a specific function/class
232
+ - To find parent-child relationships (e.g., which file contains a chunk)
233
+ - To trace declaration and usage patterns through the codebase
234
+ - To understand the connectivity of an entity in the dependency graph
235
+ - When you need a raw view of all relationships without filtering
236
+
237
+ EDGE TYPES YOU'LL SEE:
238
+ - 'contains': Parent-child (file→chunk, directory→file, repo→directory)
239
+ - 'calls': Entity usage relationships (chunk→entity it calls)
240
+ - 'declares': Entity declaration relationships (chunk→entity it defines)
241
+
242
+ DIRECTION MEANINGS:
243
+ - Incoming edges (←): Other nodes pointing TO this node (e.g., "who calls me?")
244
+ - Outgoing edges (→): This node pointing TO others (e.g., "what do I call?")
245
 
246
+ COMPARISON WITH get_neighbors:
247
+ - get_node_edges: Shows edge metadata and direction, raw relationship view
248
+ - get_neighbors: Shows neighboring node details, easier for exploration
249
 
250
  Args:
251
+ node_id: The unique identifier of the node to inspect edges for
252
 
253
  Returns:
254
+ str: List of incoming and outgoing edges with source/target node IDs and relationship types
255
  """
256
  if knowledge_graph is None:
257
  return "Error: Knowledge graph not initialized"
 
295
  @observe(as_type="tool")
296
  def search_nodes(query: str, limit: int = 10, page: int = 1) -> str:
297
  """
298
+ Search the Transformers codebase using keyword matching against code content and metadata.
299
+
300
+ PURPOSE:
301
+ This is your PRIMARY SEARCH TOOL for exploring the codebase. Use it to find relevant
302
+ code chunks based on natural language queries, function names, class names, comments,
303
+ or any text that might appear in the source code.
304
+
305
+ WHEN TO USE:
306
+ - FIRST STEP when investigating any topic in the Transformers library
307
+ - To find implementations of specific features (e.g., "rotary embeddings", "flash attention")
308
+ - To locate code by function/class name when you don't have the exact node ID
309
+ - To discover code related to a concept (e.g., "gradient checkpointing", "tokenization")
310
+ - When you don't know where something is implemented
311
+
312
+ SEARCH TIPS:
313
+ - Use specific technical terms: "rope embedding" rather than just "embedding"
314
+ - Include class/function names if known: "BertSelfAttention forward"
315
+ - Try multiple related queries if first results aren't satisfactory
316
+ - Results are ranked by relevance to your query
317
+
318
+ TYPICAL WORKFLOW:
319
+ 1. search_nodes("attention mask handling") -> find relevant chunks
320
+ 2. get_node_info(chunk_id) -> examine the code content
321
+ 3. get_chunk_context(chunk_id) -> see surrounding code for fuller picture
322
+ 4. go_to_definition(entity_name) -> find where an entity is defined
323
 
324
  Args:
325
+ query: Search terms to match against code content. Can be natural language, function names, class names, or code snippets. More specific queries yield better results.
326
+ limit: Results per page (default: 10, max recommended: 50). Use smaller limits for faster responses.
327
+ page: Page number starting from 1. Use pagination to browse through many results.
328
 
329
  Returns:
330
+ str: Ranked list of matching code chunks with IDs and content previews. Use the returned IDs with get_node_info or get_chunk_context for full details.
331
  """
332
  if knowledge_graph is None:
333
  return "Error: Knowledge graph not initialized"
 
391
  @observe(as_type="tool")
392
  def get_graph_stats() -> str:
393
  """
394
+ Get a comprehensive statistical overview of the Transformers library knowledge graph.
395
+
396
+ PURPOSE:
397
+ Use this tool to understand the scope and structure of the knowledge graph.
398
+ Provides counts and breakdowns of all node types, entity types, and relationship types.
399
+
400
+ WHEN TO USE:
401
+ - At the START of an exploration session to understand the codebase scope
402
+ - To learn what types of entities and relationships are available for querying
403
+ - To understand the terminology used in this knowledge graph (chunks, entities, edges)
404
+ - When you need to report on the overall structure of the Transformers library
405
+
406
+ WHAT YOU'LL LEARN:
407
+ - Total number of nodes and edges in the graph
408
+ - Breakdown of node types (chunks, files, directories, entities)
409
+ - Entity type distribution (classes, functions, methods, variables, etc.)
410
+ - Edge relationship types (contains, calls, declares)
411
+ - Definitions of key concepts used throughout the tools
412
+
413
+ GRAPH TERMINOLOGY:
414
+ - Chunks: Logical code segments (a function body, a class definition, etc.)
415
+ - Entities: Named programming constructs tracked across the codebase
416
+ - Edges: Relationships connecting nodes (contains, calls, declares)
417
 
418
  Returns:
419
+ str: Detailed statistics including node counts by type, entity breakdown, edge relation counts, and concept definitions to help you use other tools effectively.
420
  """
421
  if knowledge_graph is None:
422
  return "Error: Knowledge graph not initialized"
 
520
  @observe(as_type="tool")
521
  def list_nodes_by_type(node_type: str, limit: int = 20, page: int = 1) -> str:
522
  """
523
+ List all nodes of a specific type in the Transformers knowledge graph with pagination.
524
+
525
+ PURPOSE:
526
+ Use this tool to browse and discover nodes by their type. Helpful when you want to
527
+ see what classes, functions, files, or other constructs exist in the codebase.
528
+
529
+ WHEN TO USE:
530
+ - To get a list of all classes in the Transformers library: node_type='class'
531
+ - To see all Python files: node_type='file'
532
+ - To list all functions: node_type='function'
533
+ - To browse all methods: node_type='method'
534
+ - When you need to find node IDs for further exploration
535
+
536
+ VALID node_type VALUES:
537
+ For entities (programming constructs):
538
+ - 'class': Class definitions (e.g., BertModel, GPT2LMHeadModel)
539
+ - 'function': Standalone function definitions
540
+ - 'method': Class method definitions
541
+ - 'variable': Variable declarations
542
+ - 'parameter': Function/method parameters
543
+
544
+ For structural nodes:
545
+ - 'file': Source code files
546
+ - 'chunk': Code segments within files
547
+ - 'directory': Folder structure nodes
548
+ - 'repo': Repository root (typically one)
549
+
550
+ COMPARISON WITH search_by_type_and_name:
551
+ - list_nodes_by_type: Browse ALL nodes of a type (no name filter)
552
+ - search_by_type_and_name: Filter by type AND search by name substring
553
+
554
  Args:
555
+ node_type: The type to filter by. Use lowercase: 'class', 'function', 'method', 'file', 'chunk', 'directory'
556
+ limit: Maximum results per page (default: 20). Increase for broader browsing.
557
+ page: Page number starting from 1 for pagination through large result sets.
558
+
559
  Returns:
560
+ str: Alphabetically sorted list of matching nodes with their IDs and types. Use IDs with get_node_info for details.
561
  """
562
  if knowledge_graph is None:
563
  return "Error: Knowledge graph not initialized"
 
648
  @observe(as_type="tool")
649
  def get_neighbors(node_id: str, limit: int = 20, page: int = 1) -> str:
650
  """
651
+ Get all nodes directly connected to a given node with their relationship information.
652
+
653
+ PURPOSE:
654
+ Use this tool to explore the local neighborhood of any node in the knowledge graph.
655
+ Shows what's connected to a node and how, making it easy to navigate the codebase structure.
656
+
657
+ WHEN TO USE:
658
+ - To explore what a node is connected to (files, chunks, entities)
659
+ - To navigate from one code element to related elements
660
+ - To understand the local structure around a specific node
661
+ - After using get_node_info when you want to explore connected nodes
662
+ - To discover related code without knowing exact names
663
+
664
+ WHAT YOU'LL SEE:
665
+ - Neighbor node IDs and names
666
+ - Node types (chunk, file, entity, etc.)
667
+ - Relationship direction (→ outgoing, ← incoming)
668
+ - Relationship type (contains, calls, declares)
669
+
670
+ TYPICAL NAVIGATION PATTERNS:
671
+ - From a file: see its chunks and declared entities
672
+ - From a chunk: see entities it declares/calls and its parent file
673
+ - From an entity: see chunks that declare or call it
674
+ - From a directory: see contained files and subdirectories
675
+
676
+ COMPARISON WITH get_node_edges:
677
+ - get_neighbors: Shows neighboring NODE details (name, type) - better for exploration
678
+ - get_node_edges: Shows raw EDGE information - better for understanding relationships
679
 
680
  Args:
681
+ node_id: The ID of the node to explore neighbors for
682
+ limit: Maximum neighbors to return per page (default: 20)
683
+ page: Page number for pagination when node has many connections
684
 
685
  Returns:
686
+ str: List of connected nodes with their IDs, names, types, and the relationships connecting them
687
  """
688
  if knowledge_graph is None:
689
  return "Error: Knowledge graph not initialized"
 
752
  @observe(as_type="tool")
753
  def go_to_definition(entity_name: str) -> str:
754
  """
755
+ Jump to the source code location(s) where an entity is defined/declared.
756
+
757
+ PURPOSE:
758
+ Use this tool to find WHERE in the codebase a class, function, method, or variable
759
+ is defined. Returns the actual code content of the definition along with file location.
760
+
761
+ WHEN TO USE:
762
+ - To see the implementation of a class like 'BertModel' or 'GPT2Attention'
763
+ - To find where a function is defined when you know its name
764
+ - To examine the source code of any entity found through search or listing
765
+ - When you need to understand HOW something is implemented (not just WHERE it's used)
766
+ - To get the actual code definition for analysis or explanation
767
+
768
+ WHAT YOU'LL GET:
769
+ - Entity type (class, function, method, variable)
770
+ - Data type if available
771
+ - List of all locations where the entity is declared (some entities may be defined in multiple places)
772
+ - For each location: file path, chunk order, and FULL CODE CONTENT
773
+
774
+ TYPICAL WORKFLOW:
775
+ 1. search_nodes("attention") -> find entity names
776
+ 2. go_to_definition("BertSelfAttention") -> see the class implementation
777
+ 3. find_usages("BertSelfAttention") -> see where it's used
778
+
779
+ COMPARISON WITH find_usages:
780
+ - go_to_definition: Shows WHERE entity is DEFINED (the implementation)
781
+ - find_usages: Shows WHERE entity is USED/CALLED (the consumers)
782
 
783
  Args:
784
+ entity_name: Exact name of the entity (case-sensitive). Examples: 'BertModel', 'forward', 'attention_mask', 'get_extended_attention_mask'
785
 
786
  Returns:
787
+ str: Entity type, file location(s), and complete source code of the definition(s). Returns error if entity not found.
788
  """
789
  if knowledge_graph is None:
790
  return "Error: Knowledge graph not initialized"
 
825
  @observe(as_type="tool")
826
  def find_usages(entity_name: str, limit: int = 20, page: int = 1) -> str:
827
  """
828
+ Find all locations in the codebase where an entity is used or called.
829
+
830
+ PURPOSE:
831
+ Use this tool to understand the impact and usage patterns of any entity.
832
+ Shows every place where a class is instantiated, a function is called,
833
+ or a variable is referenced throughout the Transformers library.
834
+
835
+ WHEN TO USE:
836
+ - To understand how widely used a class or function is
837
+ - To see usage examples of a particular API or function
838
+ - To assess the impact of changing an entity (who depends on it?)
839
+ - To learn how to use a class/function by seeing real examples
840
+ - To trace data flow through the codebase
841
+
842
+ WHAT YOU'LL GET:
843
+ - Total count of usage locations
844
+ - For each usage: file path, chunk position, and full code context showing the usage
845
+ - Paginated results for entities with many usages
846
+
847
+ TYPICAL WORKFLOWS:
848
+
849
+ Impact Analysis:
850
+ 1. go_to_definition("deprecated_function") -> understand what it does
851
+ 2. find_usages("deprecated_function") -> see all code that needs updating
852
+
853
+ Learning by Example:
854
+ 1. list_nodes_by_type('class') -> find interesting classes
855
+ 2. find_usages("BertModel") -> see how it's instantiated and used
856
 
857
+ COMPARISON WITH go_to_definition:
858
+ - find_usages: WHERE is this entity CALLED/USED (consumers)
859
+ - go_to_definition: WHERE is this entity DEFINED (implementation)
860
 
861
  Args:
862
+ entity_name: Exact name of the entity to find usages for (case-sensitive)
863
+ limit: Usages per page (default: 20). Many popular classes have 100+ usages.
864
+ page: Page number for pagination (starts at 1)
865
 
866
  Returns:
867
+ str: List of code chunks that use this entity, with file paths and full code content showing the usage in context
868
  """
869
  if knowledge_graph is None:
870
  return "Error: Knowledge graph not initialized"
 
929
  @observe(as_type="tool")
930
  def get_file_structure(file_path: str) -> str:
931
  """
932
+ Get a structural overview of a source file showing its chunks and declared entities.
933
+
934
+ PURPOSE:
935
+ Use this tool to understand the organization of a specific file. Shows what classes,
936
+ functions, and other entities are defined in the file, plus how the file is divided into chunks.
937
+
938
+ WHEN TO USE:
939
+ - To get a table of contents for a file before diving into specifics
940
+ - To see what classes and functions a file defines
941
+ - To understand how code is organized within a file
942
+ - To find chunk IDs for further exploration with get_node_info or get_chunk_context
943
+ - When you know the file path but need to understand its contents
944
+
945
+ WHAT YOU'LL SEE:
946
+ - File path and detected programming language
947
+ - Total number of code chunks in the file
948
+ - List of declared entities (classes, functions, methods, variables) with their types
949
+ - Ordered list of chunks with their IDs and descriptions
950
+
951
+ HOW TO GET FILE PATHS:
952
+ - Use list_files_in_directory() to browse files
953
+ - Use search_nodes() and look at file paths in results
954
+ - Use list_nodes_by_type('file') to get file node IDs (which are the paths)
955
+
956
+ TYPICAL WORKFLOW:
957
+ 1. list_files_in_directory('src/transformers/models/bert') -> find files
958
+ 2. get_file_structure('src/transformers/models/bert/modeling_bert.py') -> see structure
959
+ 3. get_node_info(chunk_id) -> examine specific code chunks
960
 
961
  Args:
962
+ file_path: The full path to the file (e.g., 'src/transformers/models/bert/modeling_bert.py'). Must match exactly as stored in the knowledge graph.
963
 
964
  Returns:
965
+ str: File overview including language, chunk count, declared entities list, and chunk descriptions
966
  """
967
  if knowledge_graph is None:
968
  return "Error: Knowledge graph not initialized"
 
1008
  @observe(as_type="tool")
1009
  def get_related_chunks(chunk_id: str, relation_type: str = "calls", limit: int = 20, page: int = 1) -> str:
1010
  """
1011
+ Find code chunks connected to a given chunk through a specific relationship type.
1012
+
1013
+ PURPOSE:
1014
+ Use this tool to trace code dependencies by following relationship edges from a chunk.
1015
+ Helps understand what code a chunk depends on or what depends on it.
1016
+
1017
+ WHEN TO USE:
1018
+ - To find what entities/code a chunk calls or uses (relation_type='calls')
1019
+ - To trace dependencies from a specific piece of code
1020
+ - To explore the call graph emanating from a chunk
1021
+ - When you have a chunk ID and want to see connected code
1022
+
1023
+ RELATIONSHIP TYPES:
1024
+ - 'calls': Entities/chunks that this chunk calls or references (most common)
1025
+ - 'contains': Child nodes contained by this node (for files/directories)
1026
+ - 'declares': Entities declared by this chunk
1027
+ - 'all' or '': Get all outgoing relationships regardless of type
1028
+
1029
+ TYPICAL WORKFLOW:
1030
+ 1. search_nodes("BertAttention forward") -> find a chunk
1031
+ 2. get_related_chunks(chunk_id, 'calls') -> see what it calls
1032
+ 3. get_node_info(related_chunk_id) -> examine called code
1033
+
1034
+ COMPARISON WITH OTHER TOOLS:
1035
+ - get_neighbors: All connected nodes (any direction, any type)
1036
+ - get_related_chunks: Outgoing edges only, filtered by relationship type
1037
+ - entity_relationships: Focused on entity nodes and their relationships
1038
 
1039
  Args:
1040
+ chunk_id: The ID of the chunk to explore from (e.g., 'src/transformers/models/bert/modeling_bert.py::chunk_5')
1041
+ relation_type: Filter by relationship type: 'calls', 'contains', 'declares', or 'all' for everything (default: 'calls')
1042
+ limit: Maximum results per page (default: 20)
1043
+ page: Page number for pagination
1044
 
1045
  Returns:
1046
+ str: List of related chunks with their IDs, file paths, and entity names involved in the relationship
1047
  """
1048
  if knowledge_graph is None:
1049
  return "Error: Knowledge graph not initialized"
 
1131
  called_in_repo: Optional[bool] = None
1132
  ) -> str:
1133
  """
1134
+ Browse all programming entities (classes, functions, methods, variables) tracked in the knowledge graph.
1135
+
1136
+ PURPOSE:
1137
+ Use this tool to explore the full inventory of code entities in the Transformers library.
1138
+ Supports filtering by type and usage patterns, making it powerful for targeted exploration.
1139
+
1140
+ WHEN TO USE:
1141
+ - To browse all classes, functions, or methods in the codebase
1142
+ - To find entities that are defined but never used (dead code analysis)
1143
+ - To find external entities that are called but not defined in the repo
1144
+ - To get an overview of entity distribution in the codebase
1145
+ - When you need entity names for use with go_to_definition or find_usages
1146
+
1147
+ FILTERING OPTIONS:
1148
+
1149
+ By entity_type:
1150
+ - 'class': Class definitions (BertModel, GPT2Config, etc.)
1151
+ - 'function': Standalone functions
1152
+ - 'method': Class methods
1153
+ - 'variable': Variable declarations
1154
+ - 'parameter': Function/method parameters
1155
+ - None: All entity types
1156
+
1157
+ By declaration status (declared_in_repo):
1158
+ - True: Only entities DEFINED in this repo (has source code)
1159
+ - False: Only external entities (imported from other packages)
1160
+ - None: All entities
1161
+
1162
+ By usage status (called_in_repo):
1163
+ - True: Only entities that ARE USED somewhere in the code
1164
+ - False: Only entities that are NEVER USED (potential dead code)
1165
+ - None: All entities
1166
+
1167
+ USEFUL FILTER COMBINATIONS:
1168
+ - All classes: entity_type='class'
1169
+ - Defined classes: entity_type='class', declared_in_repo=True
1170
+ - Unused functions: entity_type='function', called_in_repo=False
1171
+ - External dependencies: declared_in_repo=False, called_in_repo=True
1172
 
1173
  Args:
1174
+ limit: Entities per page (default: 50). Use larger values for comprehensive listings.
1175
+ page: Page number starting from 1 for pagination
1176
+ entity_type: Filter by type: 'class', 'function', 'method', 'variable', 'parameter', or None for all
1177
+ declared_in_repo: True=defined in repo, False=external only, None=all
1178
+ called_in_repo: True=has usages, False=never used, None=all
1179
 
1180
  Returns:
1181
+ str: List of entities with their types, declaration count, and usage count. Use entity names with go_to_definition or find_usages.
1182
  """
1183
  if knowledge_graph is None:
1184
  return "Error: Knowledge graph not initialized"
 
1312
  @observe(as_type="tool")
1313
  def diff_chunks(node_id_1: str, node_id_2: str) -> str:
1314
  """
1315
+ Compare two code chunks and show their differences in unified diff format.
1316
+
1317
+ PURPOSE:
1318
+ Use this tool to compare two pieces of code side-by-side. Shows exactly what's
1319
+ different between them using standard unified diff format (like git diff).
1320
+
1321
+ WHEN TO USE:
1322
+ - To compare similar implementations (e.g., two attention mechanisms)
1323
+ - To understand differences between related classes or functions
1324
+ - To analyze variations in code patterns across the codebase
1325
+ - To compare two versions or implementations of similar functionality
1326
+ - When you suspect code duplication and want to see exact differences
1327
+
1328
+ DIFF FORMAT:
1329
+ - Lines starting with '-' are only in the first chunk
1330
+ - Lines starting with '+' are only in the second chunk
1331
+ - Lines without prefix are common to both
1332
+ - @@ markers show line number context
1333
+
1334
+ TYPICAL WORKFLOW:
1335
+ 1. search_nodes("attention") -> find attention implementations
1336
+ 2. Get chunk IDs from two different attention classes
1337
+ 3. diff_chunks(chunk_id_1, chunk_id_2) -> compare implementations
1338
+
1339
+ COMPARISON IDEAS:
1340
+ - BertAttention vs GPT2Attention
1341
+ - Different forward() implementations
1342
+ - Similar utility functions in different modules
1343
 
1344
  Args:
1345
+ node_id_1: ID of the first chunk/node to compare
1346
+ node_id_2: ID of the second chunk/node to compare
1347
 
1348
  Returns:
1349
+ str: Unified diff output showing line-by-line differences. Returns 'No differences found' if chunks are identical.
1350
  """
1351
  if knowledge_graph is None:
1352
  return "Error: Knowledge graph not initialized"
 
1380
  @observe(as_type="tool")
1381
  def print_tree(root_id: str = "root", max_depth: int = 3) -> str:
1382
  """
1383
+ Display a hierarchical tree view of the repository structure starting from any node.
1384
+
1385
+ PURPOSE:
1386
+ Use this tool to visualize the structure of the codebase. Shows parent-child relationships
1387
+ in a familiar tree format, helping you understand how files and directories are organized.
1388
+
1389
+ WHEN TO USE:
1390
+ - To explore the directory structure of the Transformers repository
1391
+ - To see what's inside a specific directory (use directory as root_id)
1392
+ - To understand the file organization for a component
1393
+ - To get an overview of the codebase hierarchy
1394
+ - When you need to understand where files are located
1395
+
1396
+ TREE VISUALIZATION:
1397
+ - Each level shows node name and type (repo, directory, file, chunk)
1398
+ - Indentation represents depth in the hierarchy
1399
+ - Children are limited to prevent overwhelming output
1400
+
1401
+ TIPS:
1402
+ - Start with max_depth=2 for a high-level overview
1403
+ - Increase max_depth to see more detail (but output gets larger)
1404
+ - Use a directory path as root_id to focus on a specific area
1405
+ - Use list_files_in_directory for more detailed file listings
1406
+
1407
+ TYPICAL USAGE:
1408
+ - print_tree('root', max_depth=2) -> see top-level structure
1409
+ - print_tree('src/transformers/models', max_depth=2) -> see model organization
1410
+ - print_tree('src/transformers/models/bert', max_depth=3) -> see bert module structure
1411
 
1412
  Args:
1413
+ root_id: Starting node ID. Use 'root' for repository root, or a directory/file path to start from a specific location.
1414
+ max_depth: How many levels deep to show (default: 3). Higher values show more detail but larger output.
1415
 
1416
  Returns:
1417
+ str: ASCII tree visualization showing the hierarchical structure with node names and types
1418
  """
1419
  if knowledge_graph is None:
1420
  return "Error: Knowledge graph not initialized"
 
1469
  @observe(as_type="tool")
1470
  def entity_relationships(node_id: str) -> str:
1471
  """
1472
+ Display all incoming and outgoing relationships for any node, with relationship types.
1473
+
1474
+ PURPOSE:
1475
+ Use this tool to get a complete picture of how a node connects to the rest of the
1476
+ knowledge graph. Shows both what points TO this node and what this node points TO.
1477
+
1478
+ WHEN TO USE:
1479
+ - To understand all dependencies of an entity
1480
+ - To see what declares or calls a specific entity
1481
+ - To trace the full relationship network around any node
1482
+ - When you need more detail than get_neighbors provides about relationship types
1483
+ - For entity-centric analysis (understanding a class or function's connections)
1484
+
1485
+ WHAT YOU'LL SEE:
1486
+ - Incoming relationships: Other nodes that have edges pointing TO this node
1487
+ (e.g., chunks that CALL this function, files that CONTAIN this chunk)
1488
+ - Outgoing relationships: This node's edges pointing TO other nodes
1489
+ (e.g., entities this chunk CALLS, chunks this file CONTAINS)
1490
+ - Relationship types for each edge (calls, declares, contains)
1491
+
1492
+ COMPARISON WITH SIMILAR TOOLS:
1493
+ - get_node_edges: Same information but different formatting
1494
+ - get_neighbors: Shows neighbor node details, not edge details
1495
+ - get_related_chunks: Filtered by relationship type, chunks only
1496
+
1497
+ TYPICAL WORKFLOW:
1498
+ 1. go_to_definition("BertModel") -> find entity
1499
+ 2. entity_relationships("BertModel") -> see what calls/uses BertModel
1500
 
1501
  Args:
1502
+ node_id: The ID of any node (entity, chunk, file, directory)
1503
 
1504
  Returns:
1505
+ str: Complete list of incoming and outgoing relationships with source/target IDs and relationship types
1506
  """
1507
  if knowledge_graph is None:
1508
  return "Error: Knowledge graph not initialized"
 
1545
  @observe(as_type="tool")
1546
  def search_by_type_and_name(node_type: str, name_query: str, limit: int = 10, page: int = 1, partial_allowed: bool = True) -> str:
1547
  """
1548
+ Search for nodes by combining type filtering with name pattern matching.
1549
+
1550
+ PURPOSE:
1551
+ Use this tool for precise, targeted searches when you know the type of node you're looking
1552
+ for and have a partial name. More efficient than list_nodes_by_type when you have name hints.
1553
+
1554
+ WHEN TO USE:
1555
+ - To find all classes containing 'Attention': search_by_type_and_name('class', 'Attention')
1556
+ - To find functions with 'forward' in name: search_by_type_and_name('function', 'forward')
1557
+ - To find files named 'config': search_by_type_and_name('file', 'config')
1558
+ - When you know the type AND have a partial name to search for
1559
+ - For pattern-based discovery of related components
1560
+
1561
+ SEARCH BEHAVIOR:
1562
+ - Case-insensitive matching
1563
+ - partial_allowed=True (default): Fuzzy matching, finds 'BertEmbeddings' when searching 'Embed'
1564
+ - partial_allowed=False: Requires exact substring match
1565
+ - Results sorted by match quality (exact matches first, then substring, then fuzzy)
1566
+
1567
+ VALID node_type VALUES:
1568
+ For entities: 'class', 'function', 'method', 'variable', 'parameter'
1569
+ For structural: 'file', 'chunk', 'directory'
1570
+
1571
+ SEARCH EXAMPLES:
1572
+ - All Attention classes: search_by_type_and_name('class', 'Attention')
1573
+ - All Embedding classes: search_by_type_and_name('class', 'Embedding')
1574
+ - Config files: search_by_type_and_name('file', 'config')
1575
+ - Forward methods: search_by_type_and_name('method', 'forward')
1576
+ - Test files: search_by_type_and_name('file', 'test_')
1577
+
1578
+ COMPARISON WITH SIMILAR TOOLS:
1579
+ - search_nodes: Full-text search in code content (doesn't filter by type)
1580
+ - list_nodes_by_type: Lists all of a type (no name filter)
1581
+ - search_by_type_and_name: Combines type filter + name search (best of both)
1582
 
1583
  Args:
1584
+ node_type: Type to filter by: 'class', 'function', 'method', 'file', 'chunk', 'directory', etc.
1585
+ name_query: Name pattern to search for (case-insensitive). Can be partial.
1586
+ limit: Results per page (default: 10)
1587
+ page: Page number for pagination
1588
+ partial_allowed: Enable fuzzy matching (default: True). Set False for stricter matching.
1589
 
1590
  Returns:
1591
+ str: Matching nodes sorted by relevance, with IDs and types. Use IDs with get_node_info for details.
1592
  """
1593
  if knowledge_graph is None:
1594
  return "Error: Knowledge graph not initialized"
 
1716
  @observe(as_type="tool")
1717
  def get_chunk_context(node_id: str) -> str:
1718
  """
1719
+ Get expanded code context by retrieving a chunk along with its previous and next chunks.
1720
+
1721
+ PURPOSE:
1722
+ Use this tool when you need to see MORE CODE CONTEXT around a specific chunk.
1723
+ Chunks are logical code segments, but sometimes you need to see surrounding code
1724
+ to fully understand the implementation.
1725
+
1726
+ WHEN TO USE:
1727
+ - After search_nodes or get_node_info when you need more surrounding context
1728
+ - When a chunk shows a partial function/class and you need the complete picture
1729
+ - To understand code flow across chunk boundaries
1730
+ - To see imports or setup code that precedes a chunk
1731
+ - To see what code follows after a chunk
1732
+
1733
+ WHAT YOU'LL GET:
1734
+ - The previous chunk's content (if it exists)
1735
+ - The target chunk's content
1736
+ - The next chunk's content (if it exists)
1737
+ - All organized by file and joined together seamlessly
1738
+
1739
+ CONTEXT EXPANSION:
1740
+ - Shows up to 3 consecutive chunks (prev + current + next)
1741
+ - Useful for understanding function bodies that span chunks
1742
+ - Helps see class context when looking at individual methods
1743
+
1744
+ TYPICAL WORKFLOW:
1745
+ 1. search_nodes("attention forward") -> find relevant chunk
1746
+ 2. get_node_info(chunk_id) -> see chunk content
1747
+ 3. get_chunk_context(chunk_id) -> see surrounding code for fuller understanding
1748
+
1749
+ COMPARISON WITH get_node_info:
1750
+ - get_node_info: Single chunk content + full metadata
1751
+ - get_chunk_context: Expanded code view (prev + current + next chunks), less metadata
1752
 
1753
  Args:
1754
+ node_id: The chunk ID to get context for (e.g., 'src/transformers/models/bert/modeling_bert.py::chunk_5')
1755
 
1756
  Returns:
1757
+ str: Combined content of previous, current, and next chunks organized by file. Provides seamless code view.
1758
  """
1759
 
1760
 
 
1790
  @observe(as_type="tool")
1791
  def get_file_stats(path: str) -> str:
1792
  """
1793
+ Get detailed statistics and metrics for a specific file or directory.
1794
+
1795
+ PURPOSE:
1796
+ Use this tool to get quantitative metrics about a file including line counts,
1797
+ entity counts, and chunk counts. Useful for understanding file complexity.
1798
 
1799
+ WHEN TO USE:
1800
+ - To assess the size and complexity of a file
1801
+ - To see summary counts of entities declared and called
1802
+ - To understand how a file is chunked
1803
+ - For code metrics and analysis tasks
1804
+ - When deciding which files to explore further
1805
+
1806
+ METRICS PROVIDED:
1807
+ - Line count (total lines in the file)
1808
+ - Declared entities count with a sample list
1809
+ - Called entities count with a sample list
1810
+ - Number of chunks the file is divided into
1811
+
1812
+ COMPARISON WITH get_file_structure:
1813
+ - get_file_stats: Quantitative metrics (counts, numbers)
1814
+ - get_file_structure: Qualitative overview (entity names, chunk IDs)
1815
+
1816
+ TYPICAL USAGE:
1817
+ - get_file_stats('src/transformers/models/bert/modeling_bert.py') -> see metrics
1818
+ - Use this to identify large/complex files before diving in
1819
 
1820
  Args:
1821
+ path: The file path to analyze. Must match the path as stored in the knowledge graph.
1822
 
1823
  Returns:
1824
+ str: Statistics including line count, declared entities, called entities, and chunk count
1825
  """
1826
  if knowledge_graph is None:
1827
  return "Error: Knowledge graph not initialized"
 
1874
  @observe(as_type="tool")
1875
  def find_path(source_id: str, target_id: str, max_depth: int = 5) -> str:
1876
  """
1877
+ Find the shortest path between two nodes in the knowledge graph.
1878
+
1879
+ PURPOSE:
1880
+ Use this tool to discover how two code elements are connected through the graph.
1881
+ Reveals the chain of relationships linking two seemingly unrelated pieces of code.
1882
+
1883
+ WHEN TO USE:
1884
+ - To understand how two classes/functions are related
1885
+ - To trace dependency chains between components
1886
+ - To discover indirect connections between code elements
1887
+ - To verify if two nodes are connected at all
1888
+ - For understanding code architecture and coupling
1889
+
1890
+ WHAT YOU'LL GET:
1891
+ - Path length (number of hops)
1892
+ - Ordered list of nodes from source to target
1893
+ - Visual representation of the path
1894
+
1895
+ LIMITATIONS:
1896
+ - max_depth limits search to avoid long computations
1897
+ - If no path found within max_depth, nodes may still be connected via longer path
1898
+ - Very distant nodes may require increasing max_depth
1899
+
1900
+ EXAMPLE QUERIES:
1901
+ - How is BertModel connected to GPT2Model?
1902
+ - What's the path from a utility function to a model class?
1903
+ - How many hops between two files?
1904
+
1905
+ TYPICAL WORKFLOW:
1906
+ 1. Identify two node IDs of interest
1907
+ 2. find_path(source, target) -> discover connection
1908
+ 3. get_node_info for nodes in the path to understand the relationship
1909
 
1910
  Args:
1911
+ source_id: Starting node ID (any node type)
1912
+ target_id: Destination node ID (any node type)
1913
+ max_depth: Maximum path length to search (default: 5). Increase for distant nodes.
1914
 
1915
  Returns:
1916
+ str: Path from source to target showing each node in sequence, or message if no path found
1917
  """
1918
  if knowledge_graph is None:
1919
  return "Error: Knowledge graph not initialized"
 
1952
  @observe(as_type="tool")
1953
  def get_subgraph(node_id: str, depth: int = 2, edge_types: Optional[str] = None) -> str:
1954
  """
1955
+ Extract a local subgraph around a node up to a specified depth.
1956
+
1957
+ PURPOSE:
1958
+ Use this tool to get a bounded view of the graph neighborhood around any node.
1959
+ Shows all nodes reachable within a certain number of hops, optionally filtered by edge type.
1960
+
1961
+ WHEN TO USE:
1962
+ - To understand the local network around a class or function
1963
+ - To extract a bounded region of the knowledge graph for analysis
1964
+ - To see all nodes within N hops of a target node
1965
+ - To analyze the dependency neighborhood of a component
1966
+ - When get_neighbors isn't enough and you need multi-hop exploration
1967
+
1968
+ DEPTH EXPLANATION:
1969
+ - depth=1: Only immediate neighbors (same as get_neighbors)
1970
+ - depth=2: Neighbors and their neighbors (2 hops)
1971
+ - depth=3+: Larger neighborhood (exponentially more nodes)
1972
+
1973
+ EDGE TYPE FILTERING:
1974
+ - Pass comma-separated edge types to filter: 'calls,declares'
1975
+ - Common types: 'calls', 'contains', 'declares'
1976
+ - Leave empty or None for all edge types
1977
+
1978
+ OUTPUT:
1979
+ - Node count and edge count in the subgraph
1980
+ - List of all node IDs in the extracted subgraph
1981
+ - Filtered by edge types if specified
1982
+
1983
+ TYPICAL WORKFLOW:
1984
+ 1. Find a central node of interest
1985
+ 2. get_subgraph(node_id, depth=2) -> see local neighborhood
1986
+ 3. Use node IDs from result with get_node_info for details
1987
+
1988
+ COMPARISON WITH get_neighbors:
1989
+ - get_neighbors: Single hop, shows node details
1990
+ - get_subgraph: Multi-hop, shows subgraph structure and counts
1991
 
1992
  Args:
1993
+ node_id: Central node to build subgraph around
1994
+ depth: Radius in hops from central node (default: 2). Higher = larger subgraph.
1995
+ edge_types: Optional comma-separated filter: 'calls,contains,declares' or None for all
1996
 
1997
  Returns:
1998
+ str: Subgraph summary with node/edge counts and list of included node IDs
1999
  """
2000
  if knowledge_graph is None:
2001
  return "Error: Knowledge graph not initialized"
 
2037
  @observe(as_type="tool")
2038
  def list_files_in_directory(directory_path: str = "", pattern: str = "*", recursive: bool = True, limit: int = 50, page: int = 1) -> str:
2039
  """
2040
+ Browse and list files in the repository with flexible filtering options.
2041
+
2042
+ PURPOSE:
2043
+ Use this tool to explore the file structure of the Transformers library.
2044
+ Supports directory scoping, glob patterns, and recursive/non-recursive modes.
2045
+
2046
+ WHEN TO USE:
2047
+ - To see what files exist in a directory
2048
+ - To find files by pattern (e.g., all Python files, all test files)
2049
+ - To explore the repository structure directory by directory
2050
+ - To find specific file types in specific locations
2051
+ - When you need file paths for use with other tools
2052
+
2053
+ FILTERING OPTIONS:
2054
+
2055
+ directory_path:
2056
+ - Empty string '': Search all files in the repository
2057
+ - 'src/transformers/models': Only files under this directory
2058
+ - 'src/transformers/models/bert': Focus on a specific model
2059
+
2060
+ pattern (glob patterns):
2061
+ - '*': All files (default)
2062
+ - '*.py': Python files only
2063
+ - 'test_*.py': Test files
2064
+ - '*config*': Files with 'config' in name
2065
+ - 'modeling_*.py': Modeling files
2066
+
2067
+ recursive:
2068
+ - True (default): Include files in subdirectories
2069
+ - False: Only files directly in the specified directory
2070
+
2071
+ COMMON USE CASES:
2072
+ - All files: list_files_in_directory()
2073
+ - Bert model files: list_files_in_directory('src/transformers/models/bert')
2074
+ - All Python files: list_files_in_directory(pattern='*.py')
2075
+ - Test files only: list_files_in_directory(pattern='test_*.py')
2076
+ - Config files: list_files_in_directory(pattern='*config*')
2077
+
2078
+ COMPARISON WITH print_tree:
2079
+ - print_tree: Visual hierarchy, includes directories
2080
+ - list_files_in_directory: Flat file list with details, better for finding specific files
2081
 
2082
  Args:
2083
+ directory_path: Directory to search in. Empty string for entire repository.
2084
+ pattern: Glob pattern for filename filtering (default: '*' matches all)
2085
+ recursive: Search subdirectories (default: True)
2086
+ limit: Files per page (default: 50)
2087
+ page: Page number for pagination
2088
 
2089
  Returns:
2090
+ str: List of matching files with paths, languages, and entity counts
2091
  """
2092
  if knowledge_graph is None:
2093
  return "Error: Knowledge graph not initialized"
 
2202
  @observe(as_type="tool")
2203
  def find_files_importing(module_or_entity: str, limit: int = 30, page: int = 1) -> str:
2204
  """
2205
+ Find all files that import or use a specific module, class, or function.
2206
+
2207
+ PURPOSE:
2208
+ Use this tool to trace import dependencies and understand which parts of the
2209
+ codebase depend on a particular module or entity.
2210
+
2211
+ WHEN TO USE:
2212
+ - To find all files that import a specific module (e.g., 'torch', 'numpy')
2213
+ - To trace dependencies on a class or function
2214
+ - To understand the impact scope of a module
2215
+ - To find usage patterns of external libraries
2216
+ - For dependency analysis and impact assessment
2217
+
2218
+ SEARCH BEHAVIOR:
2219
+ - Searches through 'called_entities' metadata
2220
+ - Also scans code chunks for import statement patterns
2221
+ - Matches import, from...import, require, use patterns
2222
+ - Case-insensitive matching
2223
+
2224
+ WHAT YOU'LL GET:
2225
+ - List of files that import/use the specified module or entity
2226
+ - Match type (called_entity or import_statement)
2227
+ - Matched entity names when applicable
2228
+
2229
+ EXAMPLE QUERIES:
2230
+ - find_files_importing('torch') -> files using PyTorch
2231
+ - find_files_importing('numpy') -> files using NumPy
2232
+ - find_files_importing('BertModel') -> files using BertModel
2233
+ - find_files_importing('attention') -> files related to attention
2234
+
2235
+ LIMITATIONS:
2236
+ - May not catch all dynamic imports
2237
+ - Pattern matching may have false positives/negatives
2238
+ - For comprehensive search, combine with search_nodes
2239
 
2240
  Args:
2241
+ module_or_entity: Name of the module, class, or function to search for (case-insensitive)
2242
+ limit: Maximum results per page (default: 30)
2243
+ page: Page number for pagination
2244
 
2245
  Returns:
2246
+ str: List of files that import or use the specified module/entity, with match details
2247
  """
2248
  if knowledge_graph is None:
2249
  return "Error: Knowledge graph not initialized"
 
2361
  @observe(as_type="tool")
2362
  def get_concept_overview(concept: str, limit: int = 15) -> str:
2363
  """
2364
+ Get a high-level overview of how a concept is implemented across the Transformers codebase.
2365
+
2366
+ PURPOSE:
2367
+ Use this tool for broad exploration of a concept or feature. Aggregates related
2368
+ classes, functions, files, and code snippets into a single comprehensive view.
2369
+ Ideal for initial investigation of a topic.
2370
+
2371
+ WHEN TO USE:
2372
+ - FIRST STEP when exploring a new concept (before detailed searches)
2373
+ - To understand how a feature is implemented across the codebase
2374
+ - To discover all components related to a concept
2375
+ - To get a bird's-eye view before diving into specifics
2376
+ - When you're not sure where to start investigating
2377
+
2378
+ SEARCH STRATEGY:
2379
+ This tool combines multiple search approaches:
2380
+ - Searches entity names (classes, functions, methods) containing the concept
2381
+ - Searches file names and paths
2382
+ - Searches chunk content and descriptions
2383
+ - Aggregates results into categorized sections
2384
+
2385
+ CONCEPT EXAMPLES:
2386
+ - 'attention' -> attention mechanisms across all models
2387
+ - 'embedding' -> embedding layers and utilities
2388
+ - 'tokenizer' -> tokenization components
2389
+ - 'generation' -> text generation utilities
2390
+ - 'config' -> configuration classes
2391
+ - 'cache' -> caching mechanisms
2392
+ - 'rope' -> rotary position embeddings
2393
+ - 'flash' -> flash attention implementations
2394
+
2395
+ OUTPUT STRUCTURE:
2396
+ - Related Classes: Class definitions matching the concept
2397
+ - Related Functions/Methods: Functions matching the concept
2398
+ - Related Files: Files with concept in path/name
2399
+ - Code Snippets: Relevant code chunks
2400
+
2401
+ TYPICAL WORKFLOW:
2402
+ 1. get_concept_overview('attention') -> see all attention-related components
2403
+ 2. Identify specific classes/functions of interest
2404
+ 3. go_to_definition or search_nodes for detailed exploration
2405
 
2406
  Args:
2407
+ concept: The concept to explore (e.g., 'attention', 'embedding', 'generation', 'tokenizer')
2408
+ limit: Maximum items per category (default: 15)
2409
 
2410
  Returns:
2411
+ str: Categorized overview with related classes, functions, files, and code snippets
2412
  """
2413
  if knowledge_graph is None:
2414
  return "Error: Knowledge graph not initialized"
 
2581
  with gr.Column():
2582
  node_output = gr.Textbox(label="Node Information", lines=20, max_lines=30)
2583
  node_info_btn.click(fn=get_node_info, inputs=node_id_input, outputs=node_output)
 
2584
  gr.Markdown("#Get Node Info:" + _tool_doc_md(get_node_info))
2585
+ node_edges_btn.click(fn=get_node_edges, inputs=node_id_input, outputs=node_output)
2586
  gr.Markdown("#Get Node Edges:" + _tool_doc_md(get_node_edges))
2587
 
2588
  with gr.Tab("🏗️ Structure"):