lailaelkoussy commited on
Commit
7e00a52
Β·
1 Parent(s): 3ec78dd

debug script name

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -1
  2. gradio_mcp_space.py +1482 -159
Dockerfile CHANGED
@@ -44,4 +44,4 @@ EXPOSE 7860
44
  # Default command - can be overridden to use --hf-dataset instead of --graph-file
45
  # To use HuggingFace dataset: --hf-dataset "username/dataset-name"
46
  # To use local file: --graph-file "/app/data/multihop_knowledge_graph_with_embeddings.json"
47
- CMD ["python", "-u", "gradio_mcp.py", "--host", "0.0.0.0", "--port", "7860", "--graph-file", "/app/data/multihop_knowledge_graph_with_embeddings.json"]
 
44
  # Default command - can be overridden to use --hf-dataset instead of --graph-file
45
  # To use HuggingFace dataset: --hf-dataset "username/dataset-name"
46
  # To use local file: --graph-file "/app/data/multihop_knowledge_graph_with_embeddings.json"
47
+ CMD ["python", "-u", "gradio_mcp_space.py"]
gradio_mcp_space.py CHANGED
@@ -61,10 +61,20 @@ def initialize_knowledge_graph(
61
 
62
 
63
  # ==================== Tool Functions ====================
64
-
65
  @observe(as_type="tool")
66
  def get_node_info(node_id: str) -> str:
67
- """Get detailed information about a node in the knowledge graph."""
 
 
 
 
 
 
 
 
 
 
 
68
  if knowledge_graph is None:
69
  return "Error: Knowledge graph not initialized"
70
 
@@ -90,22 +100,48 @@ def get_node_info(node_id: str) -> str:
90
 
91
  result += f"\nEntity Type: {entity_type}\n"
92
  result += f"Aliases: {', '.join(aliases) if aliases else 'None'}\n"
93
- result += f"Declared in {len(declaring_chunk_ids)} chunk(s), Called in {len(calling_chunk_ids)} chunk(s)\n"
 
 
 
 
 
 
 
 
 
 
94
  else:
95
  declared_entities = getattr(node, 'declared_entities', [])
96
  called_entities = getattr(node, 'called_entities', [])
97
- content = getattr(node, 'content', None)
98
-
99
- if declared_entities:
100
- result += f"\nDeclared Entities ({len(declared_entities)}):\n"
101
- for ent in declared_entities[:10]:
102
- result += f" - {ent}\n"
103
- if called_entities:
104
- result += f"\nCalled Entities ({len(called_entities)}):\n"
105
- for ent in called_entities[:10]:
106
- result += f" - {ent}\n"
107
- if content:
108
- result += f"\nContent Preview:\n{content[:500]}{'...' if len(content) > 500 else ''}\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  return result
111
  except Exception as e:
@@ -114,7 +150,17 @@ def get_node_info(node_id: str) -> str:
114
 
115
  @observe(as_type="tool")
116
  def get_node_edges(node_id: str) -> str:
117
- """List all incoming and outgoing edges for a node."""
 
 
 
 
 
 
 
 
 
 
118
  if knowledge_graph is None:
119
  return "Error: Knowledge graph not initialized"
120
 
@@ -123,16 +169,31 @@ def get_node_edges(node_id: str) -> str:
123
  return f"Error: Node '{node_id}' not found in knowledge graph"
124
 
125
  g = knowledge_graph.graph
126
- incoming = [(src, data.get("relation", "?")) for src, _, data in g.in_edges(node_id, data=True)]
127
- outgoing = [(tgt, data.get("relation", "?")) for _, tgt, data in g.out_edges(node_id, data=True)]
128
 
129
- result = f"Node Edges for '{node_id}':\n━━━━━━━━━━━━━���━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
130
- result += f"\nIncoming Edges ({len(incoming)}):\n"
131
- for src, rel in incoming[:20]:
132
- result += f" {src} --[{rel}]--> {node_id}\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  result += f"\nOutgoing Edges ({len(outgoing)}):\n"
134
- for tgt, rel in outgoing[:20]:
135
- result += f" {node_id} --[{rel}]--> {tgt}\n"
 
 
136
 
137
  return result
138
  except Exception as e:
@@ -141,28 +202,81 @@ def get_node_edges(node_id: str) -> str:
141
 
142
  @observe(as_type="tool")
143
  def search_nodes(query: str, limit: int = 10) -> str:
144
- """Search for nodes in the knowledge graph by query string."""
 
 
 
 
 
 
 
 
 
 
 
145
  if knowledge_graph is None:
146
  return "Error: Knowledge graph not initialized"
147
 
148
  try:
149
- limit = int(limit) if isinstance(limit, str) else limit
150
- limit = max(1, min(limit, 50))
 
 
 
 
 
 
 
151
 
152
  results = knowledge_graph.code_index.query(query, n_results=limit)
153
  metadatas = results.get("metadatas", [[]])[0]
154
 
155
  if not metadatas:
156
- return f"No results found for '{query}'"
157
 
158
  result = f"Search Results for '{query}' ({len(metadatas)} results):\n"
159
  result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
160
 
161
  for i, res in enumerate(metadatas, 1):
162
- node_id = res.get("node_id", "Unknown")
163
- node_type = res.get("node_type", "Unknown")
164
- name = res.get("name", "Unknown")
165
- result += f"{i}. [{node_type}] {name}\n ID: {node_id}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  return result
168
  except Exception as e:
@@ -171,7 +285,14 @@ def search_nodes(query: str, limit: int = 10) -> str:
171
 
172
  @observe(as_type="tool")
173
  def get_graph_stats() -> str:
174
- """Get overall statistics about the knowledge graph."""
 
 
 
 
 
 
 
175
  if knowledge_graph is None:
176
  return "Error: Knowledge graph not initialized"
177
 
@@ -182,21 +303,28 @@ def get_graph_stats() -> str:
182
 
183
  node_types = {}
184
  for _, node_attrs in g.nodes(data=True):
185
- ntype = getattr(node_attrs.get('data'), 'node_type', 'unknown')
186
- node_types[ntype] = node_types.get(ntype, 0) + 1
187
 
188
  edge_relations = {}
189
  for _, _, attrs in g.edges(data=True):
190
- relation = attrs.get("relation", "unknown")
191
  edge_relations[relation] = edge_relations.get(relation, 0) + 1
192
 
193
- result = f"Knowledge Graph Statistics:\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
194
- result += f"Total Nodes: {num_nodes}\nTotal Edges: {num_edges}\n\nNode Types:\n"
 
 
 
 
 
 
195
  for ntype, count in sorted(node_types.items(), key=lambda x: x[1], reverse=True):
196
- result += f" {ntype}: {count}\n"
 
197
  result += "\nEdge Relations:\n"
198
  for relation, count in sorted(edge_relations.items(), key=lambda x: x[1], reverse=True):
199
- result += f" {relation}: {count}\n"
200
 
201
  return result
202
  except Exception as e:
@@ -205,26 +333,46 @@ def get_graph_stats() -> str:
205
 
206
  @observe(as_type="tool")
207
  def list_nodes_by_type(node_type: str, limit: int = 20) -> str:
208
- """List nodes of a specific type in the knowledge graph."""
 
 
 
 
 
 
 
 
 
209
  if knowledge_graph is None:
210
  return "Error: Knowledge graph not initialized"
211
 
212
  try:
213
- limit = int(limit) if isinstance(limit, str) else limit
 
 
 
 
 
 
214
  g = knowledge_graph.graph
215
  matching_nodes = [
216
- {"id": node_id, "name": getattr(data['data'], 'name', 'Unknown')}
 
 
 
217
  for node_id, data in g.nodes(data=True)
218
  if getattr(data['data'], 'node_type', None) == node_type
219
  ][:limit]
220
 
221
  if not matching_nodes:
222
- return f"No nodes found of type '{node_type}'"
223
 
224
  result = f"Nodes of type '{node_type}' ({len(matching_nodes)} results):\n"
225
  result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
 
226
  for i, node in enumerate(matching_nodes, 1):
227
- result += f"{i}. {node['name']}\n ID: {node['id']}\n\n"
 
228
 
229
  return result
230
  except Exception as e:
@@ -233,7 +381,17 @@ def list_nodes_by_type(node_type: str, limit: int = 20) -> str:
233
 
234
  @observe(as_type="tool")
235
  def get_neighbors(node_id: str) -> str:
236
- """Get all nodes directly connected to a given node."""
 
 
 
 
 
 
 
 
 
 
237
  if knowledge_graph is None:
238
  return "Error: Knowledge graph not initialized"
239
 
@@ -243,16 +401,26 @@ def get_neighbors(node_id: str) -> str:
243
 
244
  neighbors = knowledge_graph.get_neighbors(node_id)
245
  if not neighbors:
246
- return f"No neighbors found for '{node_id}'"
247
 
248
  result = f"Neighbors of '{node_id}' ({len(neighbors)} total):\n"
249
  result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
250
 
251
  for i, neighbor in enumerate(neighbors[:20], 1):
252
- n_id = neighbor.get('id', 'Unknown')
253
- n_type = neighbor.get('node_type', 'Unknown')
254
- n_name = neighbor.get('name', 'Unknown')
255
- result += f"{i}. [{n_type}] {n_name}\n ID: {n_id}\n\n"
 
 
 
 
 
 
 
 
 
 
256
 
257
  return result
258
  except Exception as e:
@@ -261,27 +429,47 @@ def get_neighbors(node_id: str) -> str:
261
 
262
  @observe(as_type="tool")
263
  def go_to_definition(entity_name: str) -> str:
264
- """Find where an entity is declared or defined in the codebase."""
 
 
 
 
 
 
 
 
 
 
265
  if knowledge_graph is None:
266
  return "Error: Knowledge graph not initialized"
267
 
268
  try:
269
  if entity_name not in knowledge_graph.entities:
270
- return f"Entity '{entity_name}' not found in knowledge graph"
271
 
272
  entity_info = knowledge_graph.entities[entity_name]
273
  declaring_chunks = entity_info.get('declaring_chunk_ids', [])
274
 
275
  if not declaring_chunks:
276
- return f"No declaration found for '{entity_name}'"
277
 
278
  result = f"Definition(s) for '{entity_name}':\n"
279
  result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
280
  result += f"Type: {', '.join(entity_info.get('type', ['Unknown']))}\n"
 
 
281
  result += f"\nDeclared in {len(declaring_chunks)} location(s):\n\n"
282
 
283
  for i, chunk_id in enumerate(declaring_chunks[:5], 1):
284
- result += f"{i}. {chunk_id}\n"
 
 
 
 
 
 
 
 
285
 
286
  return result
287
  except Exception as e:
@@ -290,26 +478,52 @@ def go_to_definition(entity_name: str) -> str:
290
 
291
  @observe(as_type="tool")
292
  def find_usages(entity_name: str, limit: int = 20) -> str:
293
- """Find all usages or calls of an entity in the codebase."""
 
 
 
 
 
 
 
 
 
 
 
294
  if knowledge_graph is None:
295
  return "Error: Knowledge graph not initialized"
296
 
297
  try:
298
- limit = int(limit) if isinstance(limit, str) else limit
 
 
 
 
 
 
299
  if entity_name not in knowledge_graph.entities:
300
- return f"Entity '{entity_name}' not found in knowledge graph"
 
 
 
301
 
302
  entity_info = knowledge_graph.entities[entity_name]
303
  calling_chunks = entity_info.get('calling_chunk_ids', [])
304
 
305
  if not calling_chunks:
306
- return f"No usages found for '{entity_name}'"
307
 
308
  result = f"Usages of '{entity_name}' ({len(calling_chunks)} total):\n"
309
  result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
310
 
311
  for i, chunk_id in enumerate(calling_chunks[:limit], 1):
312
- result += f"{i}. {chunk_id}\n"
 
 
 
 
 
 
313
 
314
  return result
315
  except Exception as e:
@@ -318,7 +532,17 @@ def find_usages(entity_name: str, limit: int = 20) -> str:
318
 
319
  @observe(as_type="tool")
320
  def get_file_structure(file_path: str) -> str:
321
- """Get an overview of the structure of a file."""
 
 
 
 
 
 
 
 
 
 
322
  if knowledge_graph is None:
323
  return "Error: Knowledge graph not initialized"
324
 
@@ -337,13 +561,23 @@ def get_file_structure(file_path: str) -> str:
337
 
338
  if hasattr(file_node, 'declared_entities') and file_node.declared_entities:
339
  result += f"Declared Entities ({len(file_node.declared_entities)}):\n"
340
- for ent in file_node.declared_entities[:10]:
341
- result += f" - {ent}\n"
 
 
 
 
 
342
 
343
  result += f"\nChunks:\n"
344
  for chunk in chunks[:10]:
345
- chunk_name = getattr(chunk, 'name', 'Unknown')
346
- result += f" - {chunk_name}\n"
 
 
 
 
 
347
 
348
  return result
349
  except Exception as e:
@@ -351,105 +585,569 @@ def get_file_structure(file_path: str) -> str:
351
 
352
 
353
  @observe(as_type="tool")
354
- def get_chunk_context(node_id: str) -> str:
355
- """Get the full content of a code chunk along with its surrounding chunks."""
356
- from pedagogia_graph_code_repo.RepoKnowledgeGraphLib.utils.chunk_utils import (
357
- organize_chunks_by_file_name, join_organized_chunks
358
- )
359
 
 
 
 
 
 
 
 
360
  if knowledge_graph is None:
361
  return "Error: Knowledge graph not initialized"
362
 
363
  try:
364
- if node_id not in knowledge_graph.graph:
365
- return f"Error: Node '{node_id}' not found in knowledge graph"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
 
367
- context = knowledge_graph.get_chunk_context(node_id)
368
- if not context:
369
- return f"No context found for node '{node_id}'"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
 
371
- organized = organize_chunks_by_file_name(context)
372
- joined_content = join_organized_chunks(organized)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
 
374
- result = f"Context for '{node_id}':\n"
375
  result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
376
- result += joined_content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
 
378
  return result
379
  except Exception as e:
380
  return f"Error: {str(e)}"
381
 
382
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383
  @observe(as_type="tool")
384
  def print_tree(root_id: str = "root", max_depth: int = 3) -> str:
385
- """Show a tree view of the repository structure."""
 
 
 
 
 
 
 
 
 
 
 
386
  if knowledge_graph is None:
387
  return "Error: Knowledge graph not initialized"
388
 
389
  try:
390
- max_depth = int(max_depth) if isinstance(max_depth, str) else max_depth
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
 
392
- def build_tree(node_id, depth=0, prefix=""):
 
 
 
393
  if depth > max_depth:
394
  return ""
395
 
396
- node = knowledge_graph.graph.nodes.get(node_id, {}).get('data')
397
- if not node:
398
- return ""
399
 
400
- node_name = getattr(node, 'name', node_id)
401
- node_type = getattr(node, 'node_type', 'unknown')
402
- result = f"{prefix}{node_name} [{node_type}]\n"
403
 
404
- children = [tgt for _, tgt, data in knowledge_graph.graph.out_edges(node_id, data=True)
405
- if data.get('relation') == 'contains']
 
406
 
407
- for i, child in enumerate(children[:20]):
408
- is_last = i == len(children[:20]) - 1
409
- child_prefix = prefix + ("└── " if is_last else "β”œβ”€β”€ ")
410
- next_prefix = prefix + (" " if is_last else "β”‚ ")
411
- result += build_tree(child, depth + 1, child_prefix).replace(child_prefix, child_prefix, 1)
412
- if result.count('\n') > 1:
413
- result = result.replace(next_prefix[:-4], next_prefix[:-4], 1)
414
 
415
- return result
416
 
417
- return build_tree(root_id)
 
418
  except Exception as e:
419
  return f"Error: {str(e)}"
420
 
421
 
422
  @observe(as_type="tool")
423
- def search_by_type_and_name(node_type: str, name_query: str, limit: int = 10) -> str:
424
- """Search for nodes/entities by type and name substring."""
 
 
 
 
 
 
 
 
 
 
425
  if knowledge_graph is None:
426
  return "Error: Knowledge graph not initialized"
427
 
428
  try:
429
- limit = int(limit) if isinstance(limit, str) else limit
430
- name_query_lower = name_query.lower()
 
431
  g = knowledge_graph.graph
432
 
433
- matching = []
434
- for node_id, data in g.nodes(data=True):
435
- node = data.get('data')
436
- if not node:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
  continue
438
- n_type = getattr(node, 'node_type', None) or getattr(node, 'entity_type', None)
439
- n_name = getattr(node, 'name', '')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
440
 
441
- if n_type == node_type and name_query_lower in n_name.lower():
442
- matching.append({"id": node_id, "name": n_name, "type": n_type})
443
- if len(matching) >= limit:
444
- break
 
 
 
 
 
445
 
446
- if not matching:
447
- return f"No nodes found matching type '{node_type}' and name containing '{name_query}'"
 
 
 
448
 
449
- result = f"Search Results ({len(matching)} matches):\n"
 
 
 
 
 
 
 
450
  result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
451
- for i, m in enumerate(matching, 1):
452
- result += f"{i}. [{m['type']}] {m['name']}\n ID: {m['id']}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
453
 
454
  return result
455
  except Exception as e:
@@ -457,30 +1155,48 @@ def search_by_type_and_name(node_type: str, name_query: str, limit: int = 10) ->
457
 
458
 
459
  @observe(as_type="tool")
460
- def find_classes_inheriting_from(base_class_name: str, limit: int = 20) -> str:
461
- """Find all classes that inherit from a given base class."""
 
 
 
 
 
 
 
 
 
 
 
 
462
  if knowledge_graph is None:
463
  return "Error: Knowledge graph not initialized"
464
 
465
  try:
466
- limit = int(limit) if isinstance(limit, str) else limit
467
- matching = []
 
 
 
 
 
 
468
 
469
- for entity_name, info in knowledge_graph.entities.items():
470
- if 'class' in info.get('type', []):
471
- bases = info.get('bases', [])
472
- if base_class_name in bases or any(base_class_name in b for b in bases):
473
- matching.append({"name": entity_name, "bases": bases})
474
- if len(matching) >= limit:
475
- break
476
 
477
- if not matching:
478
- return f"No classes found inheriting from '{base_class_name}'"
479
 
480
- result = f"Classes inheriting from '{base_class_name}' ({len(matching)} found):\n"
481
  result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
482
- for i, m in enumerate(matching, 1):
483
- result += f"{i}. {m['name']}\n Bases: {', '.join(m['bases'])}\n\n"
 
 
 
 
 
484
 
485
  return result
486
  except Exception as e:
@@ -488,37 +1204,501 @@ def find_classes_inheriting_from(base_class_name: str, limit: int = 20) -> str:
488
 
489
 
490
  @observe(as_type="tool")
491
- def get_concept_overview(concept: str, limit: int = 15) -> str:
492
- """Get a high-level overview of a concept across the codebase."""
 
 
 
 
 
 
 
 
 
 
 
 
493
  if knowledge_graph is None:
494
  return "Error: Knowledge graph not initialized"
495
 
496
  try:
497
- limit = int(limit) if isinstance(limit, str) else limit
498
- concept_lower = concept.lower()
 
 
 
 
 
 
 
499
 
500
- # Search for matching entities
501
- matching_entities = []
502
- for entity_name, info in knowledge_graph.entities.items():
503
- if concept_lower in entity_name.lower():
504
- matching_entities.append({"name": entity_name, "type": info.get('type', [])})
505
- if len(matching_entities) >= limit:
506
- break
507
 
508
- # Search in code index
509
- search_results = knowledge_graph.code_index.query(concept, n_results=limit)
510
- metadatas = search_results.get("metadatas", [[]])[0]
 
511
 
512
- result = f"Concept Overview: '{concept}'\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
513
  result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
514
 
515
- result += f"Matching Entities ({len(matching_entities)}):\n"
516
- for ent in matching_entities[:10]:
517
- result += f" - {ent['name']} ({', '.join(ent['type'])})\n"
 
 
 
 
 
 
 
518
 
519
- result += f"\nRelevant Code Chunks ({len(metadatas)}):\n"
520
- for meta in metadatas[:10]:
521
- result += f" - {meta.get('name', 'Unknown')} ({meta.get('node_type', 'Unknown')})\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
522
 
523
  return result
524
  except Exception as e:
@@ -530,17 +1710,24 @@ def get_concept_overview(concept: str, limit: int = 15) -> str:
530
  def create_gradio_app():
531
  """Create and configure the Gradio interface."""
532
 
533
- with gr.Blocks(title="Knowledge Graph MCP Server", theme=gr.themes.Soft()) as demo:
 
 
 
 
 
 
534
  gr.Markdown("""
535
- # πŸ” Knowledge Graph MCP Server
536
-
537
- Explore and query your codebase knowledge graph loaded from HuggingFace.
538
  """)
539
 
540
  with gr.Tab("πŸ“Š Graph Overview"):
541
  stats_btn = gr.Button("Get Graph Statistics", variant="primary")
542
  stats_output = gr.Textbox(label="Statistics", lines=20, max_lines=30)
543
  stats_btn.click(fn=get_graph_stats, outputs=stats_output)
 
544
 
545
  with gr.Tab("πŸ”Ž Search"):
546
  with gr.Row():
@@ -551,6 +1738,7 @@ def create_gradio_app():
551
  with gr.Column():
552
  search_output = gr.Textbox(label="Search Results", lines=20, max_lines=30)
553
  search_btn.click(fn=search_nodes, inputs=[search_query, search_limit], outputs=search_output)
 
554
 
555
  with gr.Tab("πŸ“ Node Info"):
556
  with gr.Row():
@@ -562,6 +1750,8 @@ def create_gradio_app():
562
  node_output = gr.Textbox(label="Node Information", lines=20, max_lines=30)
563
  node_info_btn.click(fn=get_node_info, inputs=node_id_input, outputs=node_output)
564
  node_edges_btn.click(fn=get_node_edges, inputs=node_id_input, outputs=node_output)
 
 
565
 
566
  with gr.Tab("πŸ—οΈ Structure"):
567
  gr.Markdown("### Repository Tree")
@@ -573,6 +1763,7 @@ def create_gradio_app():
573
  with gr.Column():
574
  tree_output = gr.Textbox(label="Tree View", lines=20, max_lines=40)
575
  tree_btn.click(fn=print_tree, inputs=[tree_root, tree_depth], outputs=tree_output)
 
576
 
577
  gr.Markdown("---")
578
  gr.Markdown("### File Structure")
@@ -583,8 +1774,34 @@ def create_gradio_app():
583
  with gr.Column():
584
  file_structure_output = gr.Textbox(label="File Structure", lines=20, max_lines=30)
585
  file_structure_btn.click(fn=get_file_structure, inputs=file_path_input, outputs=file_structure_output)
 
586
 
587
  with gr.Tab("🎯 Entities"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
588
  gr.Markdown("### Go to Definition")
589
  with gr.Row():
590
  with gr.Column():
@@ -593,6 +1810,7 @@ def create_gradio_app():
593
  with gr.Column():
594
  def_output = gr.Textbox(label="Definition", lines=15, max_lines=25)
595
  def_btn.click(fn=go_to_definition, inputs=entity_name_def, outputs=def_output)
 
596
 
597
  gr.Markdown("---")
598
  gr.Markdown("### Find Usages")
@@ -604,6 +1822,7 @@ def create_gradio_app():
604
  with gr.Column():
605
  usage_output = gr.Textbox(label="Usages", lines=15, max_lines=25)
606
  usage_btn.click(fn=find_usages, inputs=[entity_name_usage, usage_limit], outputs=usage_output)
 
607
 
608
  with gr.Tab("πŸ”¬ Discovery"):
609
  gr.Markdown("### List Nodes by Type")
@@ -618,6 +1837,7 @@ def create_gradio_app():
618
  with gr.Column():
619
  type_output = gr.Textbox(label="Results", lines=20, max_lines=30)
620
  type_btn.click(fn=list_nodes_by_type, inputs=[node_type_input, type_limit], outputs=type_output)
 
621
 
622
  gr.Markdown("---")
623
  gr.Markdown("### Search by Type and Name")
@@ -632,6 +1852,7 @@ def create_gradio_app():
632
  with gr.Column():
633
  search_type_output = gr.Textbox(label="Results", lines=20, max_lines=30)
634
  search_type_btn.click(fn=search_by_type_and_name, inputs=[search_type, search_name], outputs=search_type_output)
 
635
 
636
  with gr.Tab("πŸ”— Relationships"):
637
  gr.Markdown("### Get Neighbors")
@@ -642,6 +1863,43 @@ def create_gradio_app():
642
  with gr.Column():
643
  neighbor_output = gr.Textbox(label="Neighbors", lines=20, max_lines=30)
644
  neighbor_btn.click(fn=get_neighbors, inputs=neighbor_node_id, outputs=neighbor_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
645
 
646
  gr.Markdown("---")
647
  gr.Markdown("### Find Classes Inheriting From")
@@ -652,6 +1910,7 @@ def create_gradio_app():
652
  with gr.Column():
653
  inherit_output = gr.Textbox(label="Inheriting Classes", lines=20, max_lines=30)
654
  inherit_btn.click(fn=find_classes_inheriting_from, inputs=base_class_input, outputs=inherit_output)
 
655
 
656
  with gr.Tab("πŸ“– Context"):
657
  gr.Markdown("### Get Chunk Context")
@@ -662,6 +1921,7 @@ def create_gradio_app():
662
  with gr.Column():
663
  context_output = gr.Textbox(label="Context", lines=25, max_lines=40)
664
  context_btn.click(fn=get_chunk_context, inputs=chunk_id_input, outputs=context_output)
 
665
 
666
  gr.Markdown("---")
667
  gr.Markdown("### Concept Overview")
@@ -672,6 +1932,69 @@ def create_gradio_app():
672
  with gr.Column():
673
  concept_output = gr.Textbox(label="Concept Overview", lines=25, max_lines=40)
674
  concept_btn.click(fn=get_concept_overview, inputs=concept_input, outputs=concept_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
675
 
676
  return demo
677
 
 
61
 
62
 
63
  # ==================== Tool Functions ====================
 
64
  @observe(as_type="tool")
65
  def get_node_info(node_id: str) -> str:
66
+ """
67
+ Get detailed information about a node in the knowledge graph.
68
+
69
+ Returns information including the node's type, name, description,
70
+ declared/called entities, and type-specific details.
71
+
72
+ Args:
73
+ node_id: The ID of the node to retrieve information for
74
+
75
+ Returns:
76
+ str: A formatted string with node information
77
+ """
78
  if knowledge_graph is None:
79
  return "Error: Knowledge graph not initialized"
80
 
 
100
 
101
  result += f"\nEntity Type: {entity_type}\n"
102
  result += f"Aliases: {', '.join(aliases) if aliases else 'None'}\n"
103
+ result += f"Declared in {len(declaring_chunk_ids)} chunk(s):\n"
104
+ for cid in declaring_chunk_ids[:5]:
105
+ result += f" - {cid}\n"
106
+ if len(declaring_chunk_ids) > 5:
107
+ result += f" ... and {len(declaring_chunk_ids) - 5} more\n"
108
+ result += f"Called in {len(calling_chunk_ids)} chunk(s):\n"
109
+ for cid in calling_chunk_ids[:5]:
110
+ result += f" - {cid}\n"
111
+ if len(calling_chunk_ids) > 5:
112
+ result += f" ... and {len(calling_chunk_ids) - 5} more\n"
113
+ result += f"\nSummary: Entity {node_id} ({node_name}) β€” {entity_type} declared in {len(declaring_chunk_ids)} chunk(s) and called in {len(calling_chunk_ids)} chunk(s).\n"
114
  else:
115
  declared_entities = getattr(node, 'declared_entities', [])
116
  called_entities = getattr(node, 'called_entities', [])
117
+
118
+ result += f"\nDeclared Entities ({len(declared_entities)}):\n"
119
+ for entity in declared_entities[:10]:
120
+ result += f" - {entity}\n"
121
+ if len(declared_entities) > 10:
122
+ result += f" ... and {len(declared_entities) - 10} more\n"
123
+
124
+ result += f"\nCalled Entities ({len(called_entities)}):\n"
125
+ for entity in called_entities[:10]:
126
+ result += f" - {entity}\n"
127
+ if len(called_entities) > 10:
128
+ result += f" ... and {len(called_entities) - 10} more\n"
129
+
130
+ # Add content preview for file/chunk nodes
131
+ if node_type in ['file', 'chunk']:
132
+ content = getattr(node, 'content', None)
133
+ result += f"\nContent:\n{content or 'N/A'}\n"
134
+ if hasattr(node, 'path'):
135
+ result += f"Path: {node.path}\n"
136
+ if hasattr(node, 'language'):
137
+ result += f"Language: {node.language}\n"
138
+ if node_type == 'chunk' and hasattr(node, 'order_in_file'):
139
+ result += f"Order in File: {node.order_in_file}\n"
140
+ elif node_type == 'directory':
141
+ if hasattr(node, 'path'):
142
+ result += f"Path: {node.path}\n"
143
+
144
+ result += f"\nSummary: Node {node_id} ({node_name}) β€” {node_type} with {len(declared_entities)} declared and {len(called_entities)} called entities.\n"
145
 
146
  return result
147
  except Exception as e:
 
150
 
151
  @observe(as_type="tool")
152
  def get_node_edges(node_id: str) -> str:
153
+ """
154
+ List all incoming and outgoing edges for a node.
155
+
156
+ Shows relationships to other nodes in the knowledge graph.
157
+
158
+ Args:
159
+ node_id: The ID of the node whose edges to list
160
+
161
+ Returns:
162
+ str: A formatted string showing all edges
163
+ """
164
  if knowledge_graph is None:
165
  return "Error: Knowledge graph not initialized"
166
 
 
169
  return f"Error: Node '{node_id}' not found in knowledge graph"
170
 
171
  g = knowledge_graph.graph
 
 
172
 
173
+ incoming = [
174
+ {"source": src, "target": tgt, "relation": data.get("relation", "?")}
175
+ for src, tgt, data in g.in_edges(node_id, data=True)
176
+ ]
177
+ outgoing = [
178
+ {"source": src, "target": tgt, "relation": data.get("relation", "?")}
179
+ for src, tgt, data in g.out_edges(node_id, data=True)
180
+ ]
181
+
182
+ result = f"""Node Edges for '{node_id}':
183
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
184
+
185
+ Incoming Edges ({len(incoming)}):
186
+ """
187
+ for edge in incoming[:20]:
188
+ result += f" ← {edge['source']} [{edge['relation']}]\n"
189
+ if len(incoming) > 20:
190
+ result += f" ... and {len(incoming) - 20} more\n"
191
+
192
  result += f"\nOutgoing Edges ({len(outgoing)}):\n"
193
+ for edge in outgoing[:20]:
194
+ result += f" β†’ {edge['target']} [{edge['relation']}]\n"
195
+ if len(outgoing) > 20:
196
+ result += f" ... and {len(outgoing) - 20} more\n"
197
 
198
  return result
199
  except Exception as e:
 
202
 
203
  @observe(as_type="tool")
204
  def search_nodes(query: str, limit: int = 10) -> str:
205
+ """
206
+ Search for nodes in the knowledge graph by query string.
207
+
208
+ Uses semantic and keyword search via the code index.
209
+
210
+ Args:
211
+ query: The search string to match against code index
212
+ limit: Maximum number of results to return (default: 10)
213
+
214
+ Returns:
215
+ str: A formatted string with search results
216
+ """
217
  if knowledge_graph is None:
218
  return "Error: Knowledge graph not initialized"
219
 
220
  try:
221
+ # Convert limit to int if it's a string (MCP may pass strings)
222
+ if isinstance(limit, str):
223
+ try:
224
+ limit = int(limit)
225
+ except ValueError:
226
+ return f"Error: 'limit' must be an integer, got '{limit}'"
227
+
228
+ if limit <= 0:
229
+ return "Error: limit must be a positive integer"
230
 
231
  results = knowledge_graph.code_index.query(query, n_results=limit)
232
  metadatas = results.get("metadatas", [[]])[0]
233
 
234
  if not metadatas:
235
+ return f"No results found for '{query}'."
236
 
237
  result = f"Search Results for '{query}' ({len(metadatas)} results):\n"
238
  result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
239
 
240
  for i, res in enumerate(metadatas, 1):
241
+ result += f"{i}. ID: {res.get('id', 'N/A')}\n"
242
+ content = res.get('content', '')
243
+ if content:
244
+ result += f" Content: {content}\n"
245
+
246
+ # Handle declared entities - parse JSON if it's a string
247
+ declared = res.get('declared_entities', '')
248
+ if declared and declared != '[]':
249
+ try:
250
+ # Try to parse as JSON if it's a string
251
+ import json
252
+ if isinstance(declared, str):
253
+ declared = json.loads(declared)
254
+ # Extract entity names from the list of dicts
255
+ if isinstance(declared, list) and declared:
256
+ entity_names = [e.get('name', str(e)) if isinstance(e, dict) else str(e) for e in declared[:10]]
257
+ result += f" Declared: {', '.join(entity_names)}\n"
258
+ if len(declared) > 10:
259
+ result += f" ... and {len(declared) - 10} more\n"
260
+ except (json.JSONDecodeError, AttributeError):
261
+ result += f" Declared: {declared}\n"
262
+
263
+ # Handle called entities - parse JSON if it's a string
264
+ called = res.get('called_entities', '')
265
+ if called and called != '[]':
266
+ try:
267
+ # Try to parse as JSON if it's a string
268
+ import json
269
+ if isinstance(called, str):
270
+ called = json.loads(called)
271
+ # Extract entity names from the list of dicts
272
+ if isinstance(called, list) and called:
273
+ entity_names = [e.get('name', str(e)) if isinstance(e, dict) else str(e) for e in called[:10]]
274
+ result += f" Called: {', '.join(entity_names)}\n"
275
+ if len(called) > 10:
276
+ result += f" ... and {len(called) - 10} more\n"
277
+ except (json.JSONDecodeError, AttributeError):
278
+ result += f" Called: {called}\n"
279
+ result += "\n"
280
 
281
  return result
282
  except Exception as e:
 
285
 
286
  @observe(as_type="tool")
287
  def get_graph_stats() -> str:
288
+ """
289
+ Get overall statistics about the knowledge graph.
290
+
291
+ Includes node and edge counts, types, and relations.
292
+
293
+ Returns:
294
+ str: A formatted string with graph statistics
295
+ """
296
  if knowledge_graph is None:
297
  return "Error: Knowledge graph not initialized"
298
 
 
303
 
304
  node_types = {}
305
  for _, node_attrs in g.nodes(data=True):
306
+ node_type = getattr(node_attrs['data'], 'node_type', 'Unknown')
307
+ node_types[node_type] = node_types.get(node_type, 0) + 1
308
 
309
  edge_relations = {}
310
  for _, _, attrs in g.edges(data=True):
311
+ relation = attrs.get('relation', 'Unknown')
312
  edge_relations[relation] = edge_relations.get(relation, 0) + 1
313
 
314
+ result = f"""Knowledge Graph Statistics:
315
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
316
+
317
+ Total Nodes: {num_nodes}
318
+ Total Edges: {num_edges}
319
+
320
+ Node Types:
321
+ """
322
  for ntype, count in sorted(node_types.items(), key=lambda x: x[1], reverse=True):
323
+ result += f" - {ntype}: {count}\n"
324
+
325
  result += "\nEdge Relations:\n"
326
  for relation, count in sorted(edge_relations.items(), key=lambda x: x[1], reverse=True):
327
+ result += f" - {relation}: {count}\n"
328
 
329
  return result
330
  except Exception as e:
 
333
 
334
  @observe(as_type="tool")
335
  def list_nodes_by_type(node_type: str, limit: int = 20) -> str:
336
+ """
337
+ List nodes of a specific type in the knowledge graph.
338
+
339
+ Args:
340
+ node_type: The type of nodes to list (e.g., 'function', 'class', 'file')
341
+ limit: Maximum number of nodes to return (default: 20)
342
+
343
+ Returns:
344
+ str: A formatted string with matching nodes
345
+ """
346
  if knowledge_graph is None:
347
  return "Error: Knowledge graph not initialized"
348
 
349
  try:
350
+ # Convert limit to int if it's a string (MCP may pass strings)
351
+ if isinstance(limit, str):
352
+ try:
353
+ limit = int(limit)
354
+ except ValueError:
355
+ return f"Error: 'limit' must be an integer, got '{limit}'"
356
+
357
  g = knowledge_graph.graph
358
  matching_nodes = [
359
+ {
360
+ "id": node_id,
361
+ "name": getattr(data['data'], 'name', 'Unknown')
362
+ }
363
  for node_id, data in g.nodes(data=True)
364
  if getattr(data['data'], 'node_type', None) == node_type
365
  ][:limit]
366
 
367
  if not matching_nodes:
368
+ return f"No nodes found of type '{node_type}'."
369
 
370
  result = f"Nodes of type '{node_type}' ({len(matching_nodes)} results):\n"
371
  result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
372
+
373
  for i, node in enumerate(matching_nodes, 1):
374
+ result += f"{i}. {node['name']}\n"
375
+ result += f" ID: {node['id']}\n\n"
376
 
377
  return result
378
  except Exception as e:
 
381
 
382
  @observe(as_type="tool")
383
  def get_neighbors(node_id: str) -> str:
384
+ """
385
+ Get all nodes directly connected to a given node.
386
+
387
+ Shows neighboring nodes with their relationship types.
388
+
389
+ Args:
390
+ node_id: The ID of the node whose neighbors to retrieve
391
+
392
+ Returns:
393
+ str: A formatted string showing all neighbors
394
+ """
395
  if knowledge_graph is None:
396
  return "Error: Knowledge graph not initialized"
397
 
 
401
 
402
  neighbors = knowledge_graph.get_neighbors(node_id)
403
  if not neighbors:
404
+ return f"No neighbors found for node '{node_id}'"
405
 
406
  result = f"Neighbors of '{node_id}' ({len(neighbors)} total):\n"
407
  result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
408
 
409
  for i, neighbor in enumerate(neighbors[:20], 1):
410
+ result += f"{i}. {neighbor.id}\n"
411
+ result += f" Name: {getattr(neighbor, 'name', 'Unknown')}\n"
412
+ result += f" Type: {neighbor.node_type}\n"
413
+
414
+ if knowledge_graph.graph.has_edge(node_id, neighbor.id):
415
+ edge_data = knowledge_graph.graph.get_edge_data(node_id, neighbor.id)
416
+ result += f" β†’ Relation: {edge_data.get('relation', 'Unknown')}\n"
417
+ elif knowledge_graph.graph.has_edge(neighbor.id, node_id):
418
+ edge_data = knowledge_graph.graph.get_edge_data(neighbor.id, node_id)
419
+ result += f" ← Relation: {edge_data.get('relation', 'Unknown')}\n"
420
+ result += "\n"
421
+
422
+ if len(neighbors) > 20:
423
+ result += f"... and {len(neighbors) - 20} more neighbors\n"
424
 
425
  return result
426
  except Exception as e:
 
429
 
430
  @observe(as_type="tool")
431
  def go_to_definition(entity_name: str) -> str:
432
+ """
433
+ Find where an entity is declared or defined in the codebase.
434
+
435
+ Locates the declaration point for functions, classes, variables, etc.
436
+
437
+ Args:
438
+ entity_name: The name of the entity to find the definition for
439
+
440
+ Returns:
441
+ str: A formatted string with definition locations
442
+ """
443
  if knowledge_graph is None:
444
  return "Error: Knowledge graph not initialized"
445
 
446
  try:
447
  if entity_name not in knowledge_graph.entities:
448
+ return f"Error: Entity '{entity_name}' not found in knowledge graph"
449
 
450
  entity_info = knowledge_graph.entities[entity_name]
451
  declaring_chunks = entity_info.get('declaring_chunk_ids', [])
452
 
453
  if not declaring_chunks:
454
+ return f"Entity '{entity_name}' found but no declarations identified."
455
 
456
  result = f"Definition(s) for '{entity_name}':\n"
457
  result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
458
  result += f"Type: {', '.join(entity_info.get('type', ['Unknown']))}\n"
459
+ if entity_info.get('dtype'):
460
+ result += f"Data Type: {entity_info['dtype']}\n"
461
  result += f"\nDeclared in {len(declaring_chunks)} location(s):\n\n"
462
 
463
  for i, chunk_id in enumerate(declaring_chunks[:5], 1):
464
+ if chunk_id in knowledge_graph.graph:
465
+ chunk = knowledge_graph.graph.nodes[chunk_id]['data']
466
+ result += f"{i}. Chunk: {chunk_id}\n"
467
+ result += f" File: {chunk.path}\n"
468
+ result += f" Order: {chunk.order_in_file}\n"
469
+ result += f" Content:\n{chunk.content}\n\n"
470
+
471
+ if len(declaring_chunks) > 5:
472
+ result += f"... and {len(declaring_chunks) - 5} more locations\n"
473
 
474
  return result
475
  except Exception as e:
 
478
 
479
  @observe(as_type="tool")
480
  def find_usages(entity_name: str, limit: int = 20) -> str:
481
+ """
482
+ Find all usages or calls of an entity in the codebase.
483
+
484
+ Shows where functions, classes, variables, etc. are used.
485
+
486
+ Args:
487
+ entity_name: The name of the entity to find usages for
488
+ limit: Maximum number of usages to return (default: 20)
489
+
490
+ Returns:
491
+ str: A formatted string with usage locations
492
+ """
493
  if knowledge_graph is None:
494
  return "Error: Knowledge graph not initialized"
495
 
496
  try:
497
+ # Convert limit to int if it's a string (MCP may pass strings)
498
+ if isinstance(limit, str):
499
+ try:
500
+ limit = int(limit)
501
+ except ValueError:
502
+ return f"Error: 'limit' must be an integer, got '{limit}'"
503
+
504
  if entity_name not in knowledge_graph.entities:
505
+ return f"Error: Entity '{entity_name}' not found in knowledge graph"
506
+
507
+ if limit <= 0:
508
+ return "Error: limit must be a positive integer"
509
 
510
  entity_info = knowledge_graph.entities[entity_name]
511
  calling_chunks = entity_info.get('calling_chunk_ids', [])
512
 
513
  if not calling_chunks:
514
+ return f"Entity '{entity_name}' found but no usages identified."
515
 
516
  result = f"Usages of '{entity_name}' ({len(calling_chunks)} total):\n"
517
  result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
518
 
519
  for i, chunk_id in enumerate(calling_chunks[:limit], 1):
520
+ if chunk_id in knowledge_graph.graph:
521
+ chunk = knowledge_graph.graph.nodes[chunk_id]['data']
522
+ result += f"{i}. {chunk.path} (chunk {chunk.order_in_file})\n"
523
+ result += f" Content:\n{chunk.content}\n\n"
524
+
525
+ if len(calling_chunks) > limit:
526
+ result += f"... and {len(calling_chunks) - limit} more usages\n"
527
 
528
  return result
529
  except Exception as e:
 
532
 
533
  @observe(as_type="tool")
534
  def get_file_structure(file_path: str) -> str:
535
+ """
536
+ Get an overview of the structure of a file.
537
+
538
+ Shows chunks and declared entities within a specific file.
539
+
540
+ Args:
541
+ file_path: The path of the file to get the structure for
542
+
543
+ Returns:
544
+ str: A formatted string with file structure
545
+ """
546
  if knowledge_graph is None:
547
  return "Error: Knowledge graph not initialized"
548
 
 
561
 
562
  if hasattr(file_node, 'declared_entities') and file_node.declared_entities:
563
  result += f"Declared Entities ({len(file_node.declared_entities)}):\n"
564
+ for entity in file_node.declared_entities[:15]:
565
+ if isinstance(entity, dict):
566
+ result += f" - {entity.get('name', '?')} ({entity.get('type', '?')})\n"
567
+ else:
568
+ result += f" - {entity}\n"
569
+ if len(file_node.declared_entities) > 15:
570
+ result += f" ... and {len(file_node.declared_entities) - 15} more\n"
571
 
572
  result += f"\nChunks:\n"
573
  for chunk in chunks[:10]:
574
+ result += f" [{chunk.order_in_file}] {chunk.id}\n"
575
+ if chunk.description:
576
+ desc = chunk.description[:80] + "..." if len(chunk.description) > 80 else chunk.description
577
+ result += f" {desc}\n"
578
+
579
+ if len(chunks) > 10:
580
+ result += f" ... and {len(chunks) - 10} more chunks\n"
581
 
582
  return result
583
  except Exception as e:
 
585
 
586
 
587
  @observe(as_type="tool")
588
+ def get_related_chunks(chunk_id: str, relation_type: str = "calls") -> str:
589
+ """
590
+ Get chunks related to a given chunk by a specific relationship.
591
+
592
+ Find chunks connected via relationships like 'calls', 'contains', etc.
593
 
594
+ Args:
595
+ chunk_id: The ID of the chunk to find related chunks for
596
+ relation_type: The type of relationship to filter by (default: 'calls')
597
+
598
+ Returns:
599
+ str: A formatted string with related chunks
600
+ """
601
  if knowledge_graph is None:
602
  return "Error: Knowledge graph not initialized"
603
 
604
  try:
605
+ if chunk_id not in knowledge_graph.graph:
606
+ return f"Error: Chunk '{chunk_id}' not found in knowledge graph"
607
+
608
+ related = []
609
+ for _, target, attrs in knowledge_graph.graph.out_edges(chunk_id, data=True):
610
+ if attrs.get('relation') == relation_type:
611
+ target_node = knowledge_graph.graph.nodes[target]['data']
612
+ related.append({
613
+ "id": target,
614
+ "file_path": getattr(target_node, 'path', 'Unknown'),
615
+ "entity_name": attrs.get('entity_name')
616
+ })
617
+
618
+ if not related:
619
+ return f"No chunks found with '{relation_type}' relationship from '{chunk_id}'"
620
+
621
+ result = f"Chunks related to '{chunk_id}' via '{relation_type}' ({len(related)} total):\n"
622
+ result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
623
+
624
+ for i, chunk in enumerate(related[:15], 1):
625
+ result += f"{i}. {chunk['id']}\n"
626
+ result += f" File: {chunk['file_path']}\n"
627
+ if chunk['entity_name']:
628
+ result += f" Entity: {chunk['entity_name']}\n"
629
+ result += "\n"
630
+
631
+ if len(related) > 15:
632
+ result += f"... and {len(related) - 15} more\n"
633
+
634
+ return result
635
+ except Exception as e:
636
+ return f"Error: {str(e)}"
637
+
638
+
639
+ @observe(as_type="tool")
640
+ def list_all_entities(
641
+ limit: int = 50,
642
+ page: int = 1,
643
+ entity_type: Optional[str] = None,
644
+ declared_in_repo: Optional[bool] = None
645
+ ) -> str:
646
+ """
647
+ List all entities tracked in the knowledge graph with filtering and pagination options.
648
+
649
+ Shows entity types, declaration counts, and usage counts.
650
+
651
+ Args:
652
+ limit: Maximum number of entities to return per page (default: 50)
653
+ page: Page number for pagination, 1-indexed (default: 1)
654
+ entity_type: Filter by entity type ('class', 'function', 'method', 'variable', 'parameter', 'function_call', 'method_call')
655
+ declared_in_repo: If True, only return entities with declarations. If False, only entities without declarations. If None, return all.
656
+
657
+ Returns:
658
+ str: A formatted string with all entities for the requested page
659
+ """
660
+ if knowledge_graph is None:
661
+ return "Error: Knowledge graph not initialized"
662
 
663
+ try:
664
+ # Convert limit to int if it's a string (MCP may pass strings)
665
+ if isinstance(limit, str):
666
+ try:
667
+ limit = int(limit)
668
+ except ValueError:
669
+ return f"Error: 'limit' must be an integer, got '{limit}'"
670
+
671
+ # Convert page to int if it's a string (MCP may pass strings)
672
+ if isinstance(page, str):
673
+ try:
674
+ page = int(page)
675
+ except ValueError:
676
+ return f"Error: 'page' must be an integer, got '{page}'"
677
+
678
+ if page < 1:
679
+ return "Error: 'page' must be a positive integer (1 or greater)"
680
+
681
+ # Handle entity_type - empty string should be treated as None
682
+ if entity_type == "" or entity_type == "null":
683
+ entity_type = None
684
+
685
+ # Handle declared_in_repo - convert string to bool if needed
686
+ if isinstance(declared_in_repo, str):
687
+ if declared_in_repo.lower() in ("true", "1", "yes"):
688
+ declared_in_repo = True
689
+ elif declared_in_repo.lower() in ("false", "0", "no"):
690
+ declared_in_repo = False
691
+ elif declared_in_repo.lower() in ("none", "null", "all", ""):
692
+ declared_in_repo = None
693
+
694
+ if not knowledge_graph.entities:
695
+ return "No entities found in the knowledge graph."
696
 
697
+ # Filter entities based on criteria
698
+ filtered_entities = {}
699
+ for entity_name, info in knowledge_graph.entities.items():
700
+ # Filter by entity type if specified
701
+ if entity_type is not None:
702
+ entity_types = [t.lower() if t else '' for t in info.get('type', [])]
703
+ if entity_type.lower() not in entity_types:
704
+ continue
705
+
706
+ # Filter by declared_in_repo if specified
707
+ if declared_in_repo is not None:
708
+ has_declaration = len(info.get('declaring_chunk_ids', [])) > 0
709
+ if declared_in_repo and not has_declaration:
710
+ continue
711
+ if not declared_in_repo and has_declaration:
712
+ continue
713
+
714
+ filtered_entities[entity_name] = info
715
+
716
+ # Build the response with filtered entities
717
+ if not filtered_entities:
718
+ filter_desc = []
719
+ if entity_type:
720
+ filter_desc.append(f"type={entity_type}")
721
+ if declared_in_repo is not None:
722
+ filter_desc.append(f"declared_in_repo={declared_in_repo}")
723
+ filter_text = f" (filtered by {', '.join(filter_desc)})" if filter_desc else ""
724
+ return f"No entities found{filter_text}."
725
+
726
+ # Calculate pagination
727
+ total_entities = len(filtered_entities)
728
+ total_pages = (total_entities + limit - 1) // limit # Ceiling division
729
+
730
+ if page > total_pages:
731
+ return f"Error: Page {page} does not exist. Total pages: {total_pages} (with {total_entities} entities at {limit} per page)"
732
+
733
+ start_idx = (page - 1) * limit
734
+ end_idx = start_idx + limit
735
+
736
+ # Get the paginated slice of entities
737
+ entity_items = list(filtered_entities.items())
738
+ paginated_items = entity_items[start_idx:end_idx]
739
 
740
+ result = f"All Entities (Page {page}/{total_pages}, {total_entities} total):\n"
741
  result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
742
+
743
+ for i, (entity_name, info) in enumerate(paginated_items, start=start_idx + 1):
744
+ result += f"{i}. {entity_name}\n"
745
+ result += f" Types: {', '.join(info.get('type', ['Unknown']))}\n"
746
+ result += f" Declarations: {len(info.get('declaring_chunk_ids', []))}\n"
747
+ result += f" Usages: {len(info.get('calling_chunk_ids', []))}\n\n"
748
+
749
+ # Add pagination info
750
+ result += f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
751
+ result += f"Showing {start_idx + 1}-{min(end_idx, total_entities)} of {total_entities} entities\n"
752
+ result += f"Page {page} of {total_pages}\n"
753
+
754
+ if page < total_pages:
755
+ result += f"Use page={page + 1} to see the next page\n"
756
+
757
+ # Add filter information
758
+ if entity_type:
759
+ result += f"\n(Filtered by type={entity_type})\n"
760
+ if declared_in_repo is not None:
761
+ result += f"(Filtered by declared_in_repo={declared_in_repo})\n"
762
 
763
  return result
764
  except Exception as e:
765
  return f"Error: {str(e)}"
766
 
767
 
768
+ @observe(as_type="tool")
769
+ def diff_chunks(node_id_1: str, node_id_2: str) -> str:
770
+ """
771
+ Show the diff between two code chunks or nodes.
772
+
773
+ Compares the content of two nodes and shows differences.
774
+
775
+ Args:
776
+ node_id_1: The ID of the first node/chunk
777
+ node_id_2: The ID of the second node/chunk
778
+
779
+ Returns:
780
+ str: A formatted string with the diff
781
+ """
782
+ if knowledge_graph is None:
783
+ return "Error: Knowledge graph not initialized"
784
+
785
+ try:
786
+ if node_id_1 not in knowledge_graph.graph:
787
+ return f"Error: Node '{node_id_1}' not found in knowledge graph"
788
+ if node_id_2 not in knowledge_graph.graph:
789
+ return f"Error: Node '{node_id_2}' not found in knowledge graph"
790
+
791
+ g = knowledge_graph.graph
792
+ content1 = getattr(g.nodes[node_id_1]['data'], 'content', None)
793
+ content2 = getattr(g.nodes[node_id_2]['data'], 'content', None)
794
+
795
+ if not content1 or not content2:
796
+ return "Error: One or both nodes have no content."
797
+
798
+ diff = list(difflib.unified_diff(
799
+ content1.splitlines(), content2.splitlines(),
800
+ fromfile=node_id_1, tofile=node_id_2, lineterm=""
801
+ ))
802
+
803
+ if not diff:
804
+ return "No differences found between the two chunks."
805
+
806
+ return "\n".join(diff)
807
+ except Exception as e:
808
+ return f"Error: {str(e)}"
809
+
810
+
811
  @observe(as_type="tool")
812
  def print_tree(root_id: str = "root", max_depth: int = 3) -> str:
813
+ """
814
+ Show a tree view of the repository structure.
815
+
816
+ Displays a hierarchical tree starting from a given node.
817
+
818
+ Args:
819
+ root_id: The node ID to start the tree from (default: 'root')
820
+ max_depth: Maximum depth to show (default: 3)
821
+
822
+ Returns:
823
+ str: A formatted string with the tree structure
824
+ """
825
  if knowledge_graph is None:
826
  return "Error: Knowledge graph not initialized"
827
 
828
  try:
829
+ # Convert max_depth to int if it's a string (MCP may pass strings)
830
+ if isinstance(max_depth, str):
831
+ try:
832
+ max_depth = int(max_depth)
833
+ except ValueError:
834
+ return f"Error: 'max_depth' must be an integer, got '{max_depth}'"
835
+
836
+ g = knowledge_graph.graph
837
+
838
+ if root_id not in g:
839
+ # Try to find a suitable root
840
+ roots = [n for n, d in g.nodes(data=True)
841
+ if getattr(d['data'], 'node_type', None) in ('repo', 'directory', 'file')]
842
+ if roots:
843
+ root_id = roots[0]
844
+ else:
845
+ return f"Error: Node '{root_id}' not found and no suitable root found"
846
 
847
+ result = f"Tree View (starting from '{root_id}', max depth: {max_depth}):\n"
848
+ result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
849
+
850
+ def format_node(node_id, depth):
851
  if depth > max_depth:
852
  return ""
853
 
854
+ node = g.nodes[node_id]['data']
855
+ name = getattr(node, 'name', node_id)
856
+ node_type = getattr(node, 'node_type', '?')
857
 
858
+ line = " " * depth + f"- {name} ({node_type})\n"
 
 
859
 
860
+ children = [t for s, t in g.out_edges(node_id)]
861
+ for child in children[:20]: # Limit children to prevent huge output
862
+ line += format_node(child, depth + 1)
863
 
864
+ if len(children) > 20:
865
+ line += " " * (depth + 1) + f"... and {len(children) - 20} more\n"
 
 
 
 
 
866
 
867
+ return line
868
 
869
+ result += format_node(root_id, 0)
870
+ return result
871
  except Exception as e:
872
  return f"Error: {str(e)}"
873
 
874
 
875
  @observe(as_type="tool")
876
+ def entity_relationships(node_id: str) -> str:
877
+ """
878
+ Show all relationships for a given entity or node.
879
+
880
+ Displays incoming and outgoing relationships with their types.
881
+
882
+ Args:
883
+ node_id: The node/entity ID to explore relationships for
884
+
885
+ Returns:
886
+ str: A formatted string with all relationships
887
+ """
888
  if knowledge_graph is None:
889
  return "Error: Knowledge graph not initialized"
890
 
891
  try:
892
+ if node_id not in knowledge_graph.graph:
893
+ return f"Error: Node '{node_id}' not found in knowledge graph"
894
+
895
  g = knowledge_graph.graph
896
 
897
+ result = f"Relationships for '{node_id}':\n"
898
+ result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
899
+
900
+ incoming = list(g.in_edges(node_id, data=True))
901
+ outgoing = list(g.out_edges(node_id, data=True))
902
+
903
+ if incoming:
904
+ result += f"Incoming Relationships ({len(incoming)}):\n"
905
+ for source, target, data in incoming[:20]:
906
+ result += f" ← {source} [{data.get('relation', '?')}]\n"
907
+ if len(incoming) > 20:
908
+ result += f" ... and {len(incoming) - 20} more\n"
909
+ result += "\n"
910
+
911
+ if outgoing:
912
+ result += f"Outgoing Relationships ({len(outgoing)}):\n"
913
+ for source, target, data in outgoing[:20]:
914
+ result += f" β†’ {target} [{data.get('relation', '?')}]\n"
915
+ if len(outgoing) > 20:
916
+ result += f" ... and {len(outgoing) - 20} more\n"
917
+
918
+ if not incoming and not outgoing:
919
+ result += "No relationships found.\n"
920
+
921
+ return result
922
+ except Exception as e:
923
+ return f"Error: {str(e)}"
924
+
925
+
926
+ @observe(as_type="tool")
927
+ def search_by_type_and_name(node_type: str, name_query: str, limit: int = 10, fuzzy: bool = True) -> str:
928
+ """
929
+ Search for nodes/entities by type and name substring with fuzzy matching support.
930
+
931
+ Filters nodes by type and searches for matching names. Supports partial/fuzzy matching
932
+ so searching for 'Embedding' will find 'BertEmbeddings', 'LlamaRotaryEmbedding', etc.
933
+
934
+ For entities, searches by entity_type (e.g., 'class', 'function', 'method').
935
+ For other nodes, searches by node_type (e.g., 'file', 'chunk', 'directory').
936
+
937
+ Args:
938
+ node_type: Type of node/entity (e.g., 'function', 'class', 'file', 'chunk', 'directory')
939
+ name_query: Substring to match in the name (case-insensitive, supports partial matches)
940
+ limit: Maximum results to return (default: 10)
941
+ fuzzy: Enable fuzzy/partial matching (default: True). If False, requires exact substring match.
942
+
943
+ Returns:
944
+ str: A formatted string with matching nodes
945
+ """
946
+ if knowledge_graph is None:
947
+ return "Error: Knowledge graph not initialized"
948
+
949
+ try:
950
+ # Convert limit to int if it's a string (MCP may pass strings)
951
+ if isinstance(limit, str):
952
+ try:
953
+ limit = int(limit)
954
+ except ValueError:
955
+ return f"Error: 'limit' must be an integer, got '{limit}'"
956
+
957
+ # Convert fuzzy to bool if it's a string
958
+ if isinstance(fuzzy, str):
959
+ fuzzy = fuzzy.lower() in ('true', '1', 'yes')
960
+
961
+ if limit <= 0:
962
+ return "Error: limit must be a positive integer"
963
+
964
+ g = knowledge_graph.graph
965
+ matches = []
966
+ query_lower = name_query.lower()
967
+
968
+ # Build regex pattern for fuzzy matching
969
+ # This will match names containing all characters of the query in order
970
+ if fuzzy:
971
+ # Create pattern that matches query as substring or with characters spread out
972
+ # e.g., "Embed" matches "Embedding", "BertEmbeddings", "EmbedLayer"
973
+ fuzzy_pattern = '.*'.join(re.escape(c) for c in query_lower)
974
+ fuzzy_regex = re.compile(fuzzy_pattern, re.IGNORECASE)
975
+
976
+ for nid, n in g.nodes(data=True):
977
+ node = n['data']
978
+ node_name = getattr(node, 'name', '')
979
+
980
+ if not node_name:
981
+ continue
982
+
983
+ # Check if name matches the query
984
+ name_matches = False
985
+ if fuzzy:
986
+ # Fuzzy match: substring match OR regex pattern match
987
+ if query_lower in node_name.lower() or fuzzy_regex.search(node_name):
988
+ name_matches = True
989
+ else:
990
+ # Exact substring match
991
+ if query_lower in node_name.lower():
992
+ name_matches = True
993
+
994
+ if not name_matches:
995
  continue
996
+
997
+ # Check type based on node_type
998
+ current_node_type = getattr(node, 'node_type', None)
999
+
1000
+ # For entity nodes, check entity_type instead of node_type
1001
+ if current_node_type == 'entity':
1002
+ entity_type = getattr(node, 'entity_type', '')
1003
+
1004
+ # Fallback: if entity_type is empty, check the entities dictionary
1005
+ # This handles cases where EntityNode was created before the fix
1006
+ if not entity_type and nid in knowledge_graph.entities:
1007
+ entity_types = knowledge_graph.entities[nid].get('type', [])
1008
+ entity_type = entity_types[0] if entity_types else ''
1009
+
1010
+ if entity_type and entity_type.lower() == node_type.lower():
1011
+ # Calculate match score for sorting (exact matches first)
1012
+ score = 0 if query_lower == node_name.lower() else (1 if query_lower in node_name.lower() else 2)
1013
+ matches.append({
1014
+ "id": nid,
1015
+ "name": node_name,
1016
+ "type": f"entity ({entity_type})",
1017
+ "score": score
1018
+ })
1019
+ # For other nodes, check node_type directly
1020
+ elif current_node_type == node_type:
1021
+ score = 0 if query_lower == node_name.lower() else (1 if query_lower in node_name.lower() else 2)
1022
+ matches.append({
1023
+ "id": nid,
1024
+ "name": node_name,
1025
+ "type": current_node_type,
1026
+ "score": score
1027
+ })
1028
+
1029
+ # Sort by match score (best matches first) and limit results
1030
+ matches.sort(key=lambda x: (x['score'], x['name'].lower()))
1031
+ matches = matches[:limit]
1032
+
1033
+ if not matches:
1034
+ return f"No matches for type '{node_type}' and name containing '{name_query}'."
1035
+
1036
+ result = f"Matches for type '{node_type}' and name '{name_query}' ({len(matches)} results):\n"
1037
+ result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
1038
+
1039
+ for i, match in enumerate(matches, 1):
1040
+ result += f"{i}. {match['name']}\n"
1041
+ result += f" ID: {match['id']}\n"
1042
+ result += f" Type: {match['type']}\n\n"
1043
+
1044
+ return result
1045
+ except Exception as e:
1046
+ return f"Error: {str(e)}"
1047
+
1048
+
1049
+ @observe(as_type="tool")
1050
+ def get_chunk_context(node_id: str) -> str:
1051
+ """
1052
+ Get the full content of a code chunk along with its surrounding chunks.
1053
+
1054
+ Returns the full content of the previous, current, and next chunks,
1055
+ organized by file and joined together.
1056
+
1057
+ Args:
1058
+ node_id: The node/chunk ID to get context for
1059
+
1060
+ Returns:
1061
+ str: The full content of surrounding code chunks
1062
+ """
1063
+ from pedagogia_graph_code_repo.RepoKnowledgeGraphLib.utils.chunk_utils import (
1064
+ organize_chunks_by_file_name, join_organized_chunks
1065
+ )
1066
+
1067
+ if knowledge_graph is None:
1068
+ return "Error: Knowledge graph not initialized"
1069
+
1070
+ try:
1071
+ if node_id not in knowledge_graph.graph:
1072
+ return f"Error: Node '{node_id}' not found in knowledge graph"
1073
+
1074
+ g = knowledge_graph.graph
1075
+ current_chunk = g.nodes[node_id]['data']
1076
+ previous_chunk = knowledge_graph.get_previous_chunk(node_id)
1077
+ next_chunk = knowledge_graph.get_next_chunk(node_id)
1078
+
1079
+ # Collect all chunks (previous, current, next)
1080
+ chunks = []
1081
+ if previous_chunk:
1082
+ chunks.append(previous_chunk)
1083
+ chunks.append(current_chunk)
1084
+ if next_chunk:
1085
+ chunks.append(next_chunk)
1086
+
1087
+ # Organize and join chunks
1088
+ organized = organize_chunks_by_file_name(chunks)
1089
+ full_content = join_organized_chunks(organized)
1090
+
1091
+ return full_content
1092
+ except Exception as e:
1093
+ return f"Error: {str(e)}"
1094
+
1095
 
1096
+ @observe(as_type="tool")
1097
+ def get_file_stats(path: str) -> str:
1098
+ """
1099
+ Get statistics for a file or directory.
1100
+
1101
+ Shows number of entities, lines, chunks, etc.
1102
+
1103
+ Args:
1104
+ path: The file or directory path to get statistics for
1105
 
1106
+ Returns:
1107
+ str: A formatted string with file statistics
1108
+ """
1109
+ if knowledge_graph is None:
1110
+ return "Error: Knowledge graph not initialized"
1111
 
1112
+ try:
1113
+ g = knowledge_graph.graph
1114
+ nodes = [n for n, d in g.nodes(data=True) if getattr(d['data'], 'path', None) == path]
1115
+
1116
+ if not nodes:
1117
+ return f"No nodes found for path '{path}'."
1118
+
1119
+ result = f"Statistics for '{path}':\n"
1120
  result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
1121
+
1122
+ for node_id in nodes:
1123
+ node = g.nodes[node_id]['data']
1124
+ content = getattr(node, 'content', '')
1125
+ declared = getattr(node, 'declared_entities', [])
1126
+ called = getattr(node, 'called_entities', [])
1127
+ chunks = [t for s, t in g.out_edges(node_id)
1128
+ if getattr(g.nodes[t]['data'], 'node_type', None) == 'chunk']
1129
+
1130
+ result += f"Node: {node_id} ({getattr(node, 'node_type', '?')})\n"
1131
+ result += f" Lines: {len(content.splitlines()) if content else 0}\n"
1132
+ result += f" Declared entities: {len(declared)}\n"
1133
+
1134
+ if declared:
1135
+ for entity in declared[:10]:
1136
+ if isinstance(entity, dict):
1137
+ result += f" - {entity.get('name', '?')} ({entity.get('type', '?')})\n"
1138
+ else:
1139
+ result += f" - {entity}\n"
1140
+ if len(declared) > 10:
1141
+ result += f" ... and {len(declared) - 10} more\n"
1142
+
1143
+ result += f" Called entities: {len(called)}\n"
1144
+ if called:
1145
+ for entity in called[:10]:
1146
+ result += f" - {entity}\n"
1147
+ if len(called) > 10:
1148
+ result += f" ... and {len(called) - 10} more\n"
1149
+
1150
+ result += f" Chunks: {len(chunks)}\n\n"
1151
 
1152
  return result
1153
  except Exception as e:
 
1155
 
1156
 
1157
  @observe(as_type="tool")
1158
+ def find_path(source_id: str, target_id: str, max_depth: int = 5) -> str:
1159
+ """
1160
+ Find the shortest path between two nodes in the knowledge graph.
1161
+
1162
+ Uses graph traversal to find connections between nodes.
1163
+
1164
+ Args:
1165
+ source_id: The ID of the source node
1166
+ target_id: The ID of the target node
1167
+ max_depth: Maximum depth to search for a path (default: 5)
1168
+
1169
+ Returns:
1170
+ str: A formatted string showing the path
1171
+ """
1172
  if knowledge_graph is None:
1173
  return "Error: Knowledge graph not initialized"
1174
 
1175
  try:
1176
+ # Convert max_depth to int if it's a string (MCP may pass strings)
1177
+ if isinstance(max_depth, str):
1178
+ try:
1179
+ max_depth = int(max_depth)
1180
+ except ValueError:
1181
+ return f"Error: 'max_depth' must be an integer, got '{max_depth}'"
1182
+
1183
+ path_result = knowledge_graph.find_path(source_id, target_id, max_depth)
1184
 
1185
+ if "error" in path_result:
1186
+ return f"Error: {path_result['error']}"
 
 
 
 
 
1187
 
1188
+ if not path_result.get("path"):
1189
+ return f"No path found from '{source_id}' to '{target_id}' within depth {max_depth}"
1190
 
1191
+ result = f"Path from '{source_id}' to '{target_id}':\n"
1192
  result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
1193
+ result += f"Length: {path_result['length']}\n\n"
1194
+
1195
+ path = path_result['path']
1196
+ for i, node_id in enumerate(path):
1197
+ result += f"{i}. {node_id}\n"
1198
+ if i < len(path) - 1:
1199
+ result += " ↓\n"
1200
 
1201
  return result
1202
  except Exception as e:
 
1204
 
1205
 
1206
  @observe(as_type="tool")
1207
+ def get_subgraph(node_id: str, depth: int = 2, edge_types: Optional[str] = None) -> str:
1208
+ """
1209
+ Extract a subgraph around a node up to a specified depth.
1210
+
1211
+ Optionally filters by edge types (comma-separated).
1212
+
1213
+ Args:
1214
+ node_id: The ID of the central node
1215
+ depth: The depth/radius of the subgraph to extract (default: 2)
1216
+ edge_types: Optional comma-separated list of edge types (e.g., 'calls,contains')
1217
+
1218
+ Returns:
1219
+ str: A formatted string describing the subgraph
1220
+ """
1221
  if knowledge_graph is None:
1222
  return "Error: Knowledge graph not initialized"
1223
 
1224
  try:
1225
+ # Convert depth to int if it's a string (MCP may pass strings)
1226
+ if isinstance(depth, str):
1227
+ try:
1228
+ depth = int(depth)
1229
+ except ValueError:
1230
+ return f"Error: 'depth' must be an integer, got '{depth}'"
1231
+
1232
+ edge_types_list = edge_types.split(",") if edge_types else None
1233
+ subgraph_result = knowledge_graph.get_subgraph(node_id, depth, edge_types_list)
1234
 
1235
+ if "error" in subgraph_result:
1236
+ return f"Error: {subgraph_result['error']}"
 
 
 
 
 
1237
 
1238
+ result = f"Subgraph around '{node_id}' (depth: {depth}):\n"
1239
+ result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
1240
+ result += f"Nodes: {subgraph_result['node_count']}\n"
1241
+ result += f"Edges: {subgraph_result['edge_count']}\n"
1242
 
1243
+ if edge_types_list:
1244
+ result += f"Filtered by edge types: {', '.join(edge_types_list)}\n"
1245
+
1246
+ result += "\nNodes in subgraph:\n"
1247
+ for node in subgraph_result['nodes'][:30]:
1248
+ result += f" - {node}\n"
1249
+
1250
+ if len(subgraph_result['nodes']) > 30:
1251
+ result += f" ... and {len(subgraph_result['nodes']) - 30} more\n"
1252
+
1253
+ return result
1254
+ except Exception as e:
1255
+ return f"Error: {str(e)}"
1256
+
1257
+
1258
+ @observe(as_type="tool")
1259
+ def list_files_in_directory(directory_path: str = "", pattern: str = "*", recursive: bool = True, limit: int = 50) -> str:
1260
+ """
1261
+ List files in a directory with optional glob pattern matching.
1262
+
1263
+ This provides hierarchical file listing, showing files within directories
1264
+ rather than just top-level files. Supports glob patterns for filtering.
1265
+
1266
+ Args:
1267
+ directory_path: Path to the directory to list (empty string for root/all files)
1268
+ pattern: Glob pattern to filter files (e.g., '*.py', 'test_*.py', '**/*.js')
1269
+ recursive: Whether to search recursively in subdirectories (default: True)
1270
+ limit: Maximum number of files to return (default: 50)
1271
+
1272
+ Returns:
1273
+ str: A formatted string with matching files
1274
+ """
1275
+ if knowledge_graph is None:
1276
+ return "Error: Knowledge graph not initialized"
1277
+
1278
+ try:
1279
+ # Convert limit to int if it's a string
1280
+ if isinstance(limit, str):
1281
+ try:
1282
+ limit = int(limit)
1283
+ except ValueError:
1284
+ return f"Error: 'limit' must be an integer, got '{limit}'"
1285
+
1286
+ # Convert recursive to bool if it's a string
1287
+ if isinstance(recursive, str):
1288
+ recursive = recursive.lower() in ('true', '1', 'yes')
1289
+
1290
+ g = knowledge_graph.graph
1291
+ matching_files = []
1292
+
1293
+ for nid, n in g.nodes(data=True):
1294
+ node = n['data']
1295
+ node_type = getattr(node, 'node_type', None)
1296
+
1297
+ # Only look at file nodes
1298
+ if node_type != 'file':
1299
+ continue
1300
+
1301
+ file_path = getattr(node, 'path', nid)
1302
+ file_name = getattr(node, 'name', '')
1303
+
1304
+ # Filter by directory path if specified
1305
+ if directory_path:
1306
+ if recursive:
1307
+ # Check if file is under the directory
1308
+ if not file_path.startswith(directory_path.rstrip('/') + '/') and file_path != directory_path:
1309
+ continue
1310
+ else:
1311
+ # Check if file is directly in the directory (not in subdirectories)
1312
+ parent_dir = '/'.join(file_path.rsplit('/', 1)[:-1]) if '/' in file_path else ''
1313
+ if parent_dir != directory_path.rstrip('/'):
1314
+ continue
1315
+
1316
+ # Apply glob pattern matching
1317
+ if pattern and pattern != '*':
1318
+ # Match against both full path and filename
1319
+ if not (fnmatch.fnmatch(file_path, pattern) or
1320
+ fnmatch.fnmatch(file_name, pattern) or
1321
+ fnmatch.fnmatch(file_path, f'**/{pattern}')):
1322
+ continue
1323
+
1324
+ language = getattr(node, 'language', 'Unknown')
1325
+ declared_entities = getattr(node, 'declared_entities', [])
1326
+
1327
+ matching_files.append({
1328
+ 'path': file_path,
1329
+ 'name': file_name,
1330
+ 'language': language,
1331
+ 'entity_count': len(declared_entities)
1332
+ })
1333
+
1334
+ if len(matching_files) >= limit:
1335
+ break
1336
+
1337
+ # Sort by path for consistent ordering
1338
+ matching_files.sort(key=lambda x: x['path'])
1339
+
1340
+ if not matching_files:
1341
+ filter_desc = f" in '{directory_path}'" if directory_path else ""
1342
+ pattern_desc = f" matching '{pattern}'" if pattern and pattern != '*' else ""
1343
+ return f"No files found{filter_desc}{pattern_desc}."
1344
+
1345
+ result = f"Files"
1346
+ if directory_path:
1347
+ result += f" in '{directory_path}'"
1348
+ if pattern and pattern != '*':
1349
+ result += f" matching '{pattern}'"
1350
+ result += f" ({len(matching_files)} results):\n"
1351
+ result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
1352
+
1353
+ for i, f in enumerate(matching_files, 1):
1354
+ result += f"{i}. {f['path']}\n"
1355
+ result += f" Language: {f['language']}, Entities: {f['entity_count']}\n\n"
1356
+
1357
+ return result
1358
+ except Exception as e:
1359
+ return f"Error: {str(e)}"
1360
+
1361
+
1362
+ @observe(as_type="tool")
1363
+ def find_classes_inheriting_from(base_class_name: str, limit: int = 20) -> str:
1364
+ """
1365
+ Find all classes that inherit from a given base class.
1366
+
1367
+ Searches the knowledge graph for class entities that have the specified
1368
+ base class in their inheritance chain.
1369
+
1370
+ Args:
1371
+ base_class_name: The name of the base class to find subclasses of
1372
+ limit: Maximum number of results to return (default: 20)
1373
+
1374
+ Returns:
1375
+ str: A formatted string with classes inheriting from the base class
1376
+ """
1377
+ if knowledge_graph is None:
1378
+ return "Error: Knowledge graph not initialized"
1379
+
1380
+ try:
1381
+ # Convert limit to int if it's a string
1382
+ if isinstance(limit, str):
1383
+ try:
1384
+ limit = int(limit)
1385
+ except ValueError:
1386
+ return f"Error: 'limit' must be an integer, got '{limit}'"
1387
+
1388
+ g = knowledge_graph.graph
1389
+ inheriting_classes = []
1390
+ base_lower = base_class_name.lower()
1391
+
1392
+ # First, find all class entities
1393
+ for nid, n in g.nodes(data=True):
1394
+ node = n['data']
1395
+ node_type = getattr(node, 'node_type', None)
1396
+ entity_type = getattr(node, 'entity_type', '')
1397
+
1398
+ if node_type != 'entity' or entity_type.lower() != 'class':
1399
+ continue
1400
+
1401
+ class_name = getattr(node, 'name', '')
1402
+
1403
+ # Check if this class has relationships indicating inheritance
1404
+ # Look for 'inherits', 'extends', or similar relationships
1405
+ for _, target, edge_data in g.out_edges(nid, data=True):
1406
+ relation = edge_data.get('relation', '').lower()
1407
+ target_node = g.nodes[target]['data']
1408
+ target_name = getattr(target_node, 'name', '')
1409
+
1410
+ if relation in ('inherits', 'extends', 'inherits_from', 'base_class'):
1411
+ if target_name.lower() == base_lower or base_lower in target_name.lower():
1412
+ declaring_chunks = getattr(node, 'declaring_chunk_ids', [])
1413
+ inheriting_classes.append({
1414
+ 'name': class_name,
1415
+ 'id': nid,
1416
+ 'base': target_name,
1417
+ 'file': declaring_chunks[0] if declaring_chunks else 'Unknown'
1418
+ })
1419
+ break
1420
+
1421
+ # Also check called_entities for base class references
1422
+ # (Sometimes inheritance is tracked via calls relationship)
1423
+ called = getattr(node, 'called_entities', [])
1424
+ if any(base_lower in str(c).lower() for c in called):
1425
+ # Check if it's likely an inheritance pattern
1426
+ declaring_chunks = getattr(node, 'declaring_chunk_ids', [])
1427
+ if declaring_chunks:
1428
+ chunk_id = declaring_chunks[0]
1429
+ if chunk_id in g:
1430
+ chunk_node = g.nodes[chunk_id]['data']
1431
+ content = getattr(chunk_node, 'content', '')
1432
+ # Look for class definition with inheritance pattern
1433
+ class_pattern = rf'class\s+{re.escape(class_name)}\s*\([^)]*{re.escape(base_class_name)}'
1434
+ if re.search(class_pattern, content, re.IGNORECASE):
1435
+ if not any(c['name'] == class_name for c in inheriting_classes):
1436
+ inheriting_classes.append({
1437
+ 'name': class_name,
1438
+ 'id': nid,
1439
+ 'base': base_class_name,
1440
+ 'file': chunk_id
1441
+ })
1442
+
1443
+ if len(inheriting_classes) >= limit:
1444
+ break
1445
+
1446
+ if not inheriting_classes:
1447
+ return f"No classes found inheriting from '{base_class_name}'.\n\nTip: Try searching for the base class name in code content using search_nodes."
1448
+
1449
+ result = f"Classes inheriting from '{base_class_name}' ({len(inheriting_classes)} results):\n"
1450
+ result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
1451
+
1452
+ for i, cls in enumerate(inheriting_classes, 1):
1453
+ result += f"{i}. {cls['name']}\n"
1454
+ result += f" ID: {cls['id']}\n"
1455
+ result += f" Inherits from: {cls['base']}\n"
1456
+ result += f" Defined in: {cls['file']}\n\n"
1457
+
1458
+ return result
1459
+ except Exception as e:
1460
+ return f"Error: {str(e)}"
1461
+
1462
+
1463
+ @observe(as_type="tool")
1464
+ def find_files_importing(module_or_entity: str, limit: int = 30) -> str:
1465
+ """
1466
+ Find all files that import a specific module or entity.
1467
+
1468
+ Searches for import statements and usage patterns across the codebase.
1469
+
1470
+ Args:
1471
+ module_or_entity: The name of the module or entity to find imports of
1472
+ limit: Maximum number of results to return (default: 30)
1473
+
1474
+ Returns:
1475
+ str: A formatted string with files that import the specified module/entity
1476
+ """
1477
+ if knowledge_graph is None:
1478
+ return "Error: Knowledge graph not initialized"
1479
+
1480
+ try:
1481
+ # Convert limit to int if it's a string
1482
+ if isinstance(limit, str):
1483
+ try:
1484
+ limit = int(limit)
1485
+ except ValueError:
1486
+ return f"Error: 'limit' must be an integer, got '{limit}'"
1487
+
1488
+ g = knowledge_graph.graph
1489
+ importing_files = []
1490
+ search_term = module_or_entity.lower()
1491
+
1492
+ # Search through file nodes
1493
+ for nid, n in g.nodes(data=True):
1494
+ node = n['data']
1495
+ node_type = getattr(node, 'node_type', None)
1496
+
1497
+ if node_type != 'file':
1498
+ continue
1499
+
1500
+ file_path = getattr(node, 'path', nid)
1501
+ called_entities = getattr(node, 'called_entities', [])
1502
+
1503
+ # Check if the module/entity is in called entities
1504
+ found_in_calls = False
1505
+ matched_entities = []
1506
+ for entity in called_entities:
1507
+ entity_str = str(entity).lower() if not isinstance(entity, dict) else entity.get('name', '').lower()
1508
+ if search_term in entity_str:
1509
+ found_in_calls = True
1510
+ matched_entities.append(entity_str)
1511
+
1512
+ if found_in_calls:
1513
+ importing_files.append({
1514
+ 'path': file_path,
1515
+ 'name': getattr(node, 'name', ''),
1516
+ 'matched_entities': matched_entities[:5],
1517
+ 'match_type': 'called_entity'
1518
+ })
1519
+ continue
1520
+
1521
+ # Also check chunk contents for import statements
1522
+ chunks = knowledge_graph.get_chunks_of_file(file_path) if hasattr(knowledge_graph, 'get_chunks_of_file') else []
1523
+ for chunk in chunks[:3]: # Check first few chunks (usually where imports are)
1524
+ content = getattr(chunk, 'content', '')
1525
+ # Look for import patterns
1526
+ import_patterns = [
1527
+ rf'import\s+.*{re.escape(module_or_entity)}',
1528
+ rf'from\s+.*{re.escape(module_or_entity)}.*\s+import',
1529
+ rf'require\s*\(\s*["\'].*{re.escape(module_or_entity)}',
1530
+ rf'use\s+.*{re.escape(module_or_entity)}',
1531
+ ]
1532
+ for pattern in import_patterns:
1533
+ if re.search(pattern, content, re.IGNORECASE):
1534
+ if not any(f['path'] == file_path for f in importing_files):
1535
+ importing_files.append({
1536
+ 'path': file_path,
1537
+ 'name': getattr(node, 'name', ''),
1538
+ 'matched_entities': [],
1539
+ 'match_type': 'import_statement'
1540
+ })
1541
+ break
1542
+
1543
+ if len(importing_files) >= limit:
1544
+ break
1545
+
1546
+ # Sort by path
1547
+ importing_files.sort(key=lambda x: x['path'])
1548
+
1549
+ if not importing_files:
1550
+ return f"No files found importing '{module_or_entity}'.\n\nTip: Try searching for the module name in code content using search_nodes."
1551
+
1552
+ result = f"Files importing '{module_or_entity}' ({len(importing_files)} results):\n"
1553
  result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
1554
 
1555
+ for i, f in enumerate(importing_files, 1):
1556
+ result += f"{i}. {f['path']}\n"
1557
+ result += f" Match type: {f['match_type']}\n"
1558
+ if f['matched_entities']:
1559
+ result += f" Matched: {', '.join(f['matched_entities'][:3])}\n"
1560
+ result += "\n"
1561
+
1562
+ return result
1563
+ except Exception as e:
1564
+ return f"Error: {str(e)}"
1565
 
1566
+
1567
+ @observe(as_type="tool")
1568
+ def get_concept_overview(concept: str, limit: int = 15) -> str:
1569
+ """
1570
+ Get a high-level overview of a concept across the codebase.
1571
+
1572
+ Combines multiple search strategies to provide a comprehensive view of how
1573
+ a concept (like 'embeddings', 'authentication', 'caching') is implemented.
1574
+
1575
+ Args:
1576
+ concept: The concept to search for (e.g., 'embedding', 'authentication', 'cache')
1577
+ limit: Maximum number of results per category (default: 15)
1578
+
1579
+ Returns:
1580
+ str: A formatted overview of the concept across the codebase
1581
+ """
1582
+ if knowledge_graph is None:
1583
+ return "Error: Knowledge graph not initialized"
1584
+
1585
+ try:
1586
+ # Convert limit to int if it's a string
1587
+ if isinstance(limit, str):
1588
+ try:
1589
+ limit = int(limit)
1590
+ except ValueError:
1591
+ return f"Error: 'limit' must be an integer, got '{limit}'"
1592
+
1593
+ g = knowledge_graph.graph
1594
+ concept_lower = concept.lower()
1595
+
1596
+ # Categories to collect
1597
+ related_classes = []
1598
+ related_functions = []
1599
+ related_files = []
1600
+ related_chunks = []
1601
+
1602
+ # Search through all nodes
1603
+ for nid, n in g.nodes(data=True):
1604
+ node = n['data']
1605
+ node_type = getattr(node, 'node_type', None)
1606
+ node_name = getattr(node, 'name', '')
1607
+
1608
+ # Check if concept appears in name
1609
+ name_match = concept_lower in node_name.lower()
1610
+
1611
+ if node_type == 'entity':
1612
+ entity_type = getattr(node, 'entity_type', '')
1613
+ if name_match:
1614
+ if entity_type.lower() == 'class' and len(related_classes) < limit:
1615
+ declaring = getattr(node, 'declaring_chunk_ids', [])
1616
+ related_classes.append({
1617
+ 'name': node_name,
1618
+ 'id': nid,
1619
+ 'file': declaring[0] if declaring else 'Unknown'
1620
+ })
1621
+ elif entity_type.lower() in ('function', 'method') and len(related_functions) < limit:
1622
+ declaring = getattr(node, 'declaring_chunk_ids', [])
1623
+ related_functions.append({
1624
+ 'name': node_name,
1625
+ 'id': nid,
1626
+ 'type': entity_type,
1627
+ 'file': declaring[0] if declaring else 'Unknown'
1628
+ })
1629
+
1630
+ elif node_type == 'file' and len(related_files) < limit:
1631
+ # Check if concept in filename or path
1632
+ file_path = getattr(node, 'path', '')
1633
+ if concept_lower in file_path.lower() or name_match:
1634
+ declared = getattr(node, 'declared_entities', [])
1635
+ related_files.append({
1636
+ 'path': file_path,
1637
+ 'name': node_name,
1638
+ 'entity_count': len(declared)
1639
+ })
1640
+
1641
+ elif node_type == 'chunk' and len(related_chunks) < limit // 2:
1642
+ # Check if concept in chunk content or description
1643
+ content = getattr(node, 'content', '')
1644
+ description = getattr(node, 'description', '')
1645
+ if concept_lower in content.lower() or concept_lower in (description or '').lower():
1646
+ file_path = getattr(node, 'path', '')
1647
+ related_chunks.append({
1648
+ 'id': nid,
1649
+ 'file': file_path,
1650
+ 'content': content
1651
+ })
1652
+
1653
+ # Build the overview
1654
+ result = f"Concept Overview: '{concept}'\n"
1655
+ result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
1656
+
1657
+ # Summary
1658
+ total = len(related_classes) + len(related_functions) + len(related_files) + len(related_chunks)
1659
+ result += f"Found {total} related items across the codebase.\n\n"
1660
+
1661
+ if related_classes:
1662
+ result += f"πŸ“¦ Related Classes ({len(related_classes)}):\n"
1663
+ for cls in related_classes[:10]:
1664
+ result += f" β€’ {cls['name']}\n"
1665
+ result += f" File: {cls['file']}\n"
1666
+ if len(related_classes) > 10:
1667
+ result += f" ... and {len(related_classes) - 10} more\n"
1668
+ result += "\n"
1669
+
1670
+ if related_functions:
1671
+ result += f"⚑ Related Functions/Methods ({len(related_functions)}):\n"
1672
+ for func in related_functions[:10]:
1673
+ result += f" β€’ {func['name']} ({func['type']})\n"
1674
+ result += f" File: {func['file']}\n"
1675
+ if len(related_functions) > 10:
1676
+ result += f" ... and {len(related_functions) - 10} more\n"
1677
+ result += "\n"
1678
+
1679
+ if related_files:
1680
+ result += f"πŸ“„ Related Files ({len(related_files)}):\n"
1681
+ for f in related_files[:10]:
1682
+ result += f" β€’ {f['path']}\n"
1683
+ result += f" Entities: {f['entity_count']}\n"
1684
+ if len(related_files) > 10:
1685
+ result += f" ... and {len(related_files) - 10} more\n"
1686
+ result += "\n"
1687
+
1688
+ if related_chunks:
1689
+ result += f"πŸ“ Code Snippets ({len(related_chunks)}):\n"
1690
+ for chunk in related_chunks[:5]:
1691
+ result += f" β€’ {chunk['id']}\n"
1692
+ result += f" Content:\n{chunk['content']}\n\n"
1693
+ if len(related_chunks) > 5:
1694
+ result += f" ... and {len(related_chunks) - 5} more\n"
1695
+
1696
+ if total == 0:
1697
+ result += "No direct matches found.\n\n"
1698
+ result += "Suggestions:\n"
1699
+ result += f" β€’ Try searching with: search_nodes('{concept}')\n"
1700
+ result += f" β€’ Try partial name: search_by_type_and_name('class', '{concept[:4]}')\n"
1701
+ result += f" β€’ Check entity list: list_all_entities(entity_type='class')\n"
1702
 
1703
  return result
1704
  except Exception as e:
 
1710
  def create_gradio_app():
1711
  """Create and configure the Gradio interface."""
1712
 
1713
+ with gr.Blocks(title="Transformers Knowledge Graph Explorer β€” Knowledge Graph MCP Server", theme=gr.themes.Soft()) as demo:
1714
+ # Helper to render tool docstrings in the UI
1715
+ def _tool_doc_md(func):
1716
+ doc = (func.__doc__ or "No description available.").strip()
1717
+ # Render as a fenced code block for readability
1718
+ return f"**Description:**\n\n```\n{doc}\n```"
1719
+
1720
  gr.Markdown("""
1721
+ # πŸ” Transformers Knowledge Graph Explorer
1722
+
1723
+ Explore and query the Hugging Face Transformers library codebase using a knowledge graph.
1724
  """)
1725
 
1726
  with gr.Tab("πŸ“Š Graph Overview"):
1727
  stats_btn = gr.Button("Get Graph Statistics", variant="primary")
1728
  stats_output = gr.Textbox(label="Statistics", lines=20, max_lines=30)
1729
  stats_btn.click(fn=get_graph_stats, outputs=stats_output)
1730
+ gr.Markdown(_tool_doc_md(get_graph_stats))
1731
 
1732
  with gr.Tab("πŸ”Ž Search"):
1733
  with gr.Row():
 
1738
  with gr.Column():
1739
  search_output = gr.Textbox(label="Search Results", lines=20, max_lines=30)
1740
  search_btn.click(fn=search_nodes, inputs=[search_query, search_limit], outputs=search_output)
1741
+ gr.Markdown(_tool_doc_md(search_nodes))
1742
 
1743
  with gr.Tab("πŸ“ Node Info"):
1744
  with gr.Row():
 
1750
  node_output = gr.Textbox(label="Node Information", lines=20, max_lines=30)
1751
  node_info_btn.click(fn=get_node_info, inputs=node_id_input, outputs=node_output)
1752
  node_edges_btn.click(fn=get_node_edges, inputs=node_id_input, outputs=node_output)
1753
+ gr.Markdown(_tool_doc_md(get_node_info))
1754
+ gr.Markdown(_tool_doc_md(get_node_edges))
1755
 
1756
  with gr.Tab("πŸ—οΈ Structure"):
1757
  gr.Markdown("### Repository Tree")
 
1763
  with gr.Column():
1764
  tree_output = gr.Textbox(label="Tree View", lines=20, max_lines=40)
1765
  tree_btn.click(fn=print_tree, inputs=[tree_root, tree_depth], outputs=tree_output)
1766
+ gr.Markdown(_tool_doc_md(print_tree))
1767
 
1768
  gr.Markdown("---")
1769
  gr.Markdown("### File Structure")
 
1774
  with gr.Column():
1775
  file_structure_output = gr.Textbox(label="File Structure", lines=20, max_lines=30)
1776
  file_structure_btn.click(fn=get_file_structure, inputs=file_path_input, outputs=file_structure_output)
1777
+ gr.Markdown(_tool_doc_md(get_file_structure))
1778
 
1779
  with gr.Tab("🎯 Entities"):
1780
+ gr.Markdown("### List All Entities")
1781
+ with gr.Row():
1782
+ with gr.Column():
1783
+ entity_page = gr.Slider(1, 100, value=1, step=1, label="Page")
1784
+ entity_limit = gr.Slider(10, 100, value=50, step=10, label="Per Page")
1785
+ entity_type_filter = gr.Dropdown(
1786
+ choices=["", "class", "function", "method", "variable", "parameter"],
1787
+ label="Filter by Type (optional)", value=""
1788
+ )
1789
+ declared_in_repo = gr.Dropdown(
1790
+ choices=["", "true", "false"],
1791
+ label="Declared in Repo (optional)",
1792
+ value=""
1793
+ )
1794
+ list_entities_btn = gr.Button("List Entities", variant="primary")
1795
+ with gr.Column():
1796
+ list_entities_output = gr.Textbox(label="Entities", lines=20, max_lines=30)
1797
+ list_entities_btn.click(
1798
+ fn=list_all_entities,
1799
+ inputs=[entity_limit, entity_page, entity_type_filter, declared_in_repo],
1800
+ outputs=list_entities_output,
1801
+ )
1802
+ gr.Markdown(_tool_doc_md(list_all_entities))
1803
+
1804
+ gr.Markdown("---")
1805
  gr.Markdown("### Go to Definition")
1806
  with gr.Row():
1807
  with gr.Column():
 
1810
  with gr.Column():
1811
  def_output = gr.Textbox(label="Definition", lines=15, max_lines=25)
1812
  def_btn.click(fn=go_to_definition, inputs=entity_name_def, outputs=def_output)
1813
+ gr.Markdown(_tool_doc_md(go_to_definition))
1814
 
1815
  gr.Markdown("---")
1816
  gr.Markdown("### Find Usages")
 
1822
  with gr.Column():
1823
  usage_output = gr.Textbox(label="Usages", lines=15, max_lines=25)
1824
  usage_btn.click(fn=find_usages, inputs=[entity_name_usage, usage_limit], outputs=usage_output)
1825
+ gr.Markdown(_tool_doc_md(find_usages))
1826
 
1827
  with gr.Tab("πŸ”¬ Discovery"):
1828
  gr.Markdown("### List Nodes by Type")
 
1837
  with gr.Column():
1838
  type_output = gr.Textbox(label="Results", lines=20, max_lines=30)
1839
  type_btn.click(fn=list_nodes_by_type, inputs=[node_type_input, type_limit], outputs=type_output)
1840
+ gr.Markdown(_tool_doc_md(list_nodes_by_type))
1841
 
1842
  gr.Markdown("---")
1843
  gr.Markdown("### Search by Type and Name")
 
1852
  with gr.Column():
1853
  search_type_output = gr.Textbox(label="Results", lines=20, max_lines=30)
1854
  search_type_btn.click(fn=search_by_type_and_name, inputs=[search_type, search_name], outputs=search_type_output)
1855
+ gr.Markdown(_tool_doc_md(search_by_type_and_name))
1856
 
1857
  with gr.Tab("πŸ”— Relationships"):
1858
  gr.Markdown("### Get Neighbors")
 
1863
  with gr.Column():
1864
  neighbor_output = gr.Textbox(label="Neighbors", lines=20, max_lines=30)
1865
  neighbor_btn.click(fn=get_neighbors, inputs=neighbor_node_id, outputs=neighbor_output)
1866
+ gr.Markdown(_tool_doc_md(get_neighbors))
1867
+
1868
+ gr.Markdown("---")
1869
+ gr.Markdown("### Entity Relationships")
1870
+ with gr.Row():
1871
+ with gr.Column():
1872
+ rel_node_id = gr.Textbox(label="Node ID", placeholder="Enter node ID...")
1873
+ rel_btn = gr.Button("Get Relationships", variant="primary")
1874
+ with gr.Column():
1875
+ rel_output = gr.Textbox(label="Relationships", lines=20, max_lines=30)
1876
+ rel_btn.click(fn=entity_relationships, inputs=rel_node_id, outputs=rel_output)
1877
+ gr.Markdown(_tool_doc_md(entity_relationships))
1878
+
1879
+ gr.Markdown("---")
1880
+ gr.Markdown("### Get Related Chunks")
1881
+ with gr.Row():
1882
+ with gr.Column():
1883
+ related_chunk_id = gr.Textbox(label="Chunk ID", placeholder="Enter chunk ID...")
1884
+ relation_type = gr.Dropdown(choices=["calls", "contains", "declares", "uses"], label="Relation Type", value="calls")
1885
+ related_btn = gr.Button("Get Related Chunks", variant="primary")
1886
+ with gr.Column():
1887
+ related_output = gr.Textbox(label="Related Chunks", lines=20, max_lines=30)
1888
+ related_btn.click(fn=get_related_chunks, inputs=[related_chunk_id, relation_type], outputs=related_output)
1889
+ gr.Markdown(_tool_doc_md(get_related_chunks))
1890
+
1891
+ gr.Markdown("---")
1892
+ gr.Markdown("### Find Path Between Nodes")
1893
+ with gr.Row():
1894
+ with gr.Column():
1895
+ path_source = gr.Textbox(label="Source Node ID", placeholder="Enter source node ID...")
1896
+ path_target = gr.Textbox(label="Target Node ID", placeholder="Enter target node ID...")
1897
+ path_depth = gr.Slider(1, 10, value=5, step=1, label="Max Depth")
1898
+ path_btn = gr.Button("Find Path", variant="primary")
1899
+ with gr.Column():
1900
+ path_output = gr.Textbox(label="Path", lines=20, max_lines=30)
1901
+ path_btn.click(fn=find_path, inputs=[path_source, path_target, path_depth], outputs=path_output)
1902
+ gr.Markdown(_tool_doc_md(find_path))
1903
 
1904
  gr.Markdown("---")
1905
  gr.Markdown("### Find Classes Inheriting From")
 
1910
  with gr.Column():
1911
  inherit_output = gr.Textbox(label="Inheriting Classes", lines=20, max_lines=30)
1912
  inherit_btn.click(fn=find_classes_inheriting_from, inputs=base_class_input, outputs=inherit_output)
1913
+ gr.Markdown(_tool_doc_md(find_classes_inheriting_from))
1914
 
1915
  with gr.Tab("πŸ“– Context"):
1916
  gr.Markdown("### Get Chunk Context")
 
1921
  with gr.Column():
1922
  context_output = gr.Textbox(label="Context", lines=25, max_lines=40)
1923
  context_btn.click(fn=get_chunk_context, inputs=chunk_id_input, outputs=context_output)
1924
+ gr.Markdown(_tool_doc_md(get_chunk_context))
1925
 
1926
  gr.Markdown("---")
1927
  gr.Markdown("### Concept Overview")
 
1932
  with gr.Column():
1933
  concept_output = gr.Textbox(label="Concept Overview", lines=25, max_lines=40)
1934
  concept_btn.click(fn=get_concept_overview, inputs=concept_input, outputs=concept_output)
1935
+ gr.Markdown(_tool_doc_md(get_concept_overview))
1936
+
1937
+ gr.Markdown("---")
1938
+ gr.Markdown("### Get Subgraph")
1939
+ with gr.Row():
1940
+ with gr.Column():
1941
+ subgraph_node = gr.Textbox(label="Center Node ID", placeholder="Enter node ID...")
1942
+ subgraph_depth = gr.Slider(1, 5, value=2, step=1, label="Depth")
1943
+ subgraph_edge_types = gr.Textbox(label="Edge Types (comma-separated, optional)", placeholder="e.g., calls,contains")
1944
+ subgraph_btn = gr.Button("Extract Subgraph", variant="primary")
1945
+ with gr.Column():
1946
+ subgraph_output = gr.Textbox(label="Subgraph", lines=20, max_lines=30)
1947
+ subgraph_btn.click(fn=get_subgraph, inputs=[subgraph_node, subgraph_depth, subgraph_edge_types], outputs=subgraph_output)
1948
+ gr.Markdown(_tool_doc_md(get_subgraph))
1949
+
1950
+ with gr.Tab("πŸ“ Files"):
1951
+ gr.Markdown("### List Files in Directory")
1952
+ with gr.Row():
1953
+ with gr.Column():
1954
+ dir_path = gr.Textbox(label="Directory Path (empty for root)", placeholder="e.g., src/")
1955
+ file_pattern = gr.Textbox(label="Pattern", value="*", placeholder="e.g., *.py")
1956
+ file_recursive = gr.Checkbox(label="Recursive", value=True)
1957
+ file_limit = gr.Slider(10, 100, value=50, step=10, label="Max Results")
1958
+ list_files_btn = gr.Button("List Files", variant="primary")
1959
+ with gr.Column():
1960
+ list_files_output = gr.Textbox(label="Files", lines=20, max_lines=30)
1961
+ list_files_btn.click(fn=list_files_in_directory, inputs=[dir_path, file_pattern, file_recursive, file_limit], outputs=list_files_output)
1962
+ gr.Markdown(_tool_doc_md(list_files_in_directory))
1963
+
1964
+ gr.Markdown("---")
1965
+ gr.Markdown("### Find Files Importing")
1966
+ with gr.Row():
1967
+ with gr.Column():
1968
+ import_module = gr.Textbox(label="Module/Entity Name", placeholder="e.g., torch, numpy...")
1969
+ import_limit = gr.Slider(10, 50, value=30, step=5, label="Max Results")
1970
+ find_imports_btn = gr.Button("Find Files", variant="primary")
1971
+ with gr.Column():
1972
+ find_imports_output = gr.Textbox(label="Importing Files", lines=20, max_lines=30)
1973
+ find_imports_btn.click(fn=find_files_importing, inputs=[import_module, import_limit], outputs=find_imports_output)
1974
+ gr.Markdown(_tool_doc_md(find_files_importing))
1975
+
1976
+ gr.Markdown("---")
1977
+ gr.Markdown("### Get File Stats")
1978
+ with gr.Row():
1979
+ with gr.Column():
1980
+ stats_path = gr.Textbox(label="File Path", placeholder="Enter file path...")
1981
+ stats_btn = gr.Button("Get Stats", variant="primary")
1982
+ with gr.Column():
1983
+ stats_output = gr.Textbox(label="Statistics", lines=20, max_lines=30)
1984
+ stats_btn.click(fn=get_file_stats, inputs=stats_path, outputs=stats_output)
1985
+ gr.Markdown(_tool_doc_md(get_file_stats))
1986
+
1987
+ with gr.Tab("πŸ” Analysis"):
1988
+ gr.Markdown("### Diff Chunks")
1989
+ with gr.Row():
1990
+ with gr.Column():
1991
+ diff_node1 = gr.Textbox(label="First Node ID", placeholder="Enter first node ID...")
1992
+ diff_node2 = gr.Textbox(label="Second Node ID", placeholder="Enter second node ID...")
1993
+ diff_btn = gr.Button("Show Diff", variant="primary")
1994
+ with gr.Column():
1995
+ diff_output = gr.Textbox(label="Diff Output", lines=25, max_lines=40)
1996
+ diff_btn.click(fn=diff_chunks, inputs=[diff_node1, diff_node2], outputs=diff_output)
1997
+ gr.Markdown(_tool_doc_md(diff_chunks))
1998
 
1999
  return demo
2000