|
|
""" |
|
|
Simplified Gradio MCP Server for Knowledge Graphs loaded from HuggingFace datasets. |
|
|
""" |
|
|
import os |
|
|
import sys |
|
|
import argparse |
|
|
import difflib |
|
|
import fnmatch |
|
|
import re |
|
|
from typing import Optional, List |
|
|
import gradio as gr |
|
|
from RepoKnowledgeGraphLib.utils.chunk_utils import ( |
|
|
organize_chunks_by_file_name, join_organized_chunks |
|
|
) |
|
|
|
|
|
|
|
|
try: |
|
|
from langfuse import get_client, observe |
|
|
langfuse = get_client() |
|
|
LANGFUSE_ENABLED = langfuse.auth_check() |
|
|
if LANGFUSE_ENABLED: |
|
|
print("✓ Langfuse client is authenticated and ready!") |
|
|
else: |
|
|
print("⚠️ Langfuse authentication failed. Tracing disabled.") |
|
|
except Exception as e: |
|
|
print(f"⚠️ Langfuse not available: {e}. Tracing disabled.") |
|
|
LANGFUSE_ENABLED = False |
|
|
def observe(*args, **kwargs): |
|
|
def decorator(func): |
|
|
return func |
|
|
return decorator |
|
|
|
|
|
|
|
|
def _sanitize_value(v): |
|
|
if isinstance(v, str): |
|
|
return v.strip() |
|
|
if isinstance(v, dict): |
|
|
return {k: _sanitize_value(val) for k, val in v.items()} |
|
|
if isinstance(v, (list, tuple)): |
|
|
t = type(v) |
|
|
return t(_sanitize_value(x) for x in v) |
|
|
return v |
|
|
|
|
|
|
|
|
def sanitize_inputs(func): |
|
|
"""Decorator that trims whitespace from all string args/kwargs before calling func.""" |
|
|
def wrapper(*args, **kwargs): |
|
|
new_args = tuple(_sanitize_value(a) for a in args) |
|
|
new_kwargs = {k: _sanitize_value(v) for k, v in kwargs.items()} |
|
|
return func(*new_args, **new_kwargs) |
|
|
|
|
|
try: |
|
|
wrapper.__name__ = func.__name__ |
|
|
wrapper.__doc__ = func.__doc__ |
|
|
except Exception: |
|
|
pass |
|
|
return wrapper |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
_original_observe = observe |
|
|
def _observe_with_sanitize(*o_args, **o_kwargs): |
|
|
def decorator(f): |
|
|
return _original_observe(*o_args, **o_kwargs)(sanitize_inputs(f)) |
|
|
return decorator |
|
|
observe = _observe_with_sanitize |
|
|
except Exception: |
|
|
|
|
|
pass |
|
|
|
|
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'RepoKnowledgeGraphLib')) |
|
|
|
|
|
from RepoKnowledgeGraphLib.RepoKnowledgeGraph import RepoKnowledgeGraph |
|
|
|
|
|
|
|
|
knowledge_graph = None |
|
|
|
|
|
|
|
|
def initialize_knowledge_graph( |
|
|
hf_dataset: str, |
|
|
hf_token: Optional[str] = None, |
|
|
index_nodes: bool = True, |
|
|
code_index_kwargs: Optional[dict] = None |
|
|
): |
|
|
"""Initialize the knowledge graph from a HuggingFace dataset.""" |
|
|
global knowledge_graph |
|
|
|
|
|
model_service_kwargs = { |
|
|
"embedder_type": "sentence-transformers", |
|
|
"embed_model_name": "Salesforce/SFR-Embedding-Code-400M_R", |
|
|
} |
|
|
|
|
|
print(f"Loading knowledge graph from HuggingFace dataset: {hf_dataset}") |
|
|
knowledge_graph = RepoKnowledgeGraph.from_hf_dataset( |
|
|
repo_id=hf_dataset, |
|
|
index_nodes=index_nodes, |
|
|
model_service_kwargs=model_service_kwargs, |
|
|
code_index_kwargs=code_index_kwargs, |
|
|
token=hf_token |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
@observe(as_type="tool") |
|
|
def get_node_info(node_id: str) -> str: |
|
|
""" |
|
|
Retrieve comprehensive details about any node in the Transformers library knowledge graph. |
|
|
|
|
|
PURPOSE: |
|
|
Use this tool to inspect the full metadata and content of a specific node when you need |
|
|
to understand what a particular code element contains, what entities it declares or calls, |
|
|
and how it fits into the codebase structure. |
|
|
|
|
|
WHEN TO USE: |
|
|
- After finding a node ID from search_nodes, list_nodes_by_type, or get_neighbors |
|
|
- To see the actual code content of a chunk node |
|
|
- To understand what entities (classes, functions, variables) are declared in a file or chunk |
|
|
- To examine entity metadata including aliases, declaration locations, and usage locations |
|
|
- To get file metadata like language and path information |
|
|
|
|
|
NODE TYPES SUPPORTED: |
|
|
- 'chunk': Code segments with content, declared/called entities, and file position |
|
|
- 'file': Source files with path, language, and entity summaries |
|
|
- 'directory': Folder nodes with path information |
|
|
- 'entity': Programming constructs (classes, functions, methods, variables) with declaration/usage tracking |
|
|
- 'repo': Repository root node |
|
|
|
|
|
TYPICAL WORKFLOW: |
|
|
1. search_nodes("attention mechanism") -> get node IDs |
|
|
2. get_node_info(node_id) -> see full content and metadata |
|
|
3. get_neighbors(node_id) or find_usages(entity_name) -> explore relationships |
|
|
|
|
|
Args: |
|
|
node_id: The unique identifier of the node (e.g., 'src/transformers/models/bert/modeling_bert.py::chunk_3' for chunks, or 'BertModel' for entities) |
|
|
|
|
|
Returns: |
|
|
str: Formatted details including node type, name, description, content (for chunks), declared entities, called entities, and type-specific metadata |
|
|
|
|
|
Example node_ids: |
|
|
- Chunk: 'src/transformers/models/bert/modeling_bert.py::chunk_5' |
|
|
- File: 'src/transformers/models/bert/modeling_bert.py' |
|
|
- Entity: 'BertModel', 'forward', 'attention_mask' |
|
|
""" |
|
|
if knowledge_graph is None: |
|
|
return "Error: Knowledge graph not initialized" |
|
|
|
|
|
try: |
|
|
if node_id not in knowledge_graph.graph: |
|
|
return f"Error: Node '{node_id}' not found in knowledge graph" |
|
|
|
|
|
node = knowledge_graph.graph.nodes[node_id]['data'] |
|
|
node_type = getattr(node, 'node_type', 'Unknown') |
|
|
node_class = node.__class__.__name__ |
|
|
node_name = getattr(node, 'name', 'Unknown') |
|
|
description = getattr(node, 'description', None) |
|
|
|
|
|
result = f"Node Information:\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n" |
|
|
result += f"Node ID: {node_id}\nClass: {node_class}\nName: {node_name}\nType: {node_type}\n" |
|
|
result += f"Description: {description or 'N/A'}\n" |
|
|
|
|
|
if node_class == 'EntityNode' or node_type == 'entity': |
|
|
entity_type = getattr(node, 'entity_type', 'Unknown') |
|
|
declaring_chunk_ids = getattr(node, 'declaring_chunk_ids', []) |
|
|
calling_chunk_ids = getattr(node, 'calling_chunk_ids', []) |
|
|
aliases = getattr(node, 'aliases', []) |
|
|
|
|
|
result += f"\nEntity Type: {entity_type}\n" |
|
|
result += f"Aliases: {', '.join(aliases) if aliases else 'None'}\n" |
|
|
result += f"Declared in {len(declaring_chunk_ids)} chunk(s):\n" |
|
|
for cid in declaring_chunk_ids[:5]: |
|
|
result += f" - {cid}\n" |
|
|
if len(declaring_chunk_ids) > 5: |
|
|
result += f" ... and {len(declaring_chunk_ids) - 5} more\n" |
|
|
result += f"Called in {len(calling_chunk_ids)} chunk(s):\n" |
|
|
for cid in calling_chunk_ids[:5]: |
|
|
result += f" - {cid}\n" |
|
|
if len(calling_chunk_ids) > 5: |
|
|
result += f" ... and {len(calling_chunk_ids) - 5} more\n" |
|
|
result += f"\nSummary: Entity {node_id} ({node_name}) — {entity_type} declared in {len(declaring_chunk_ids)} chunk(s) and called in {len(calling_chunk_ids)} chunk(s).\n" |
|
|
else: |
|
|
declared_entities = getattr(node, 'declared_entities', []) |
|
|
called_entities = getattr(node, 'called_entities', []) |
|
|
|
|
|
result += f"\nDeclared Entities ({len(declared_entities)}):\n" |
|
|
for entity in declared_entities[:10]: |
|
|
result += f" - {entity}\n" |
|
|
if len(declared_entities) > 10: |
|
|
result += f" ... and {len(declared_entities) - 10} more\n" |
|
|
|
|
|
result += f"\nCalled Entities ({len(called_entities)}):\n" |
|
|
for entity in called_entities[:10]: |
|
|
result += f" - {entity}\n" |
|
|
if len(called_entities) > 10: |
|
|
result += f" ... and {len(called_entities) - 10} more\n" |
|
|
|
|
|
|
|
|
if node_type in ['file', 'chunk']: |
|
|
content = getattr(node, 'content', None) |
|
|
result += f"\nContent:\n{content or 'N/A'}\n" |
|
|
if hasattr(node, 'path'): |
|
|
result += f"Path: {node.path}\n" |
|
|
if hasattr(node, 'language'): |
|
|
result += f"Language: {node.language}\n" |
|
|
if node_type == 'chunk' and hasattr(node, 'order_in_file'): |
|
|
result += f"Order in File: {node.order_in_file}\n" |
|
|
elif node_type == 'directory': |
|
|
if hasattr(node, 'path'): |
|
|
result += f"Path: {node.path}\n" |
|
|
|
|
|
result += f"\nSummary: Node {node_id} ({node_name}) — {node_type} with {len(declared_entities)} declared and {len(called_entities)} called entities.\n" |
|
|
|
|
|
return result |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
@observe(as_type="tool") |
|
|
def get_node_edges(node_id: str) -> str: |
|
|
""" |
|
|
List all graph edges (relationships) connected to a specific node in the knowledge graph. |
|
|
|
|
|
PURPOSE: |
|
|
Use this tool to understand how a node is connected to other parts of the codebase. |
|
|
Reveals the dependency structure and relationships that link code elements together. |
|
|
|
|
|
WHEN TO USE: |
|
|
- To discover what code calls or depends on a specific function/class |
|
|
- To find parent-child relationships (e.g., which file contains a chunk) |
|
|
- To trace declaration and usage patterns through the codebase |
|
|
- To understand the connectivity of an entity in the dependency graph |
|
|
- When you need a raw view of all relationships without filtering |
|
|
|
|
|
EDGE TYPES YOU'LL SEE: |
|
|
- 'contains': Parent-child (file→chunk, directory→file, repo→directory) |
|
|
- 'calls': Entity usage relationships (chunk→entity it calls) |
|
|
- 'declares': Entity declaration relationships (chunk→entity it defines) |
|
|
|
|
|
DIRECTION MEANINGS: |
|
|
- Incoming edges (←): Other nodes pointing TO this node (e.g., "who calls me?") |
|
|
- Outgoing edges (→): This node pointing TO others (e.g., "what do I call?") |
|
|
|
|
|
COMPARISON WITH get_neighbors: |
|
|
- get_node_edges: Shows edge metadata and direction, raw relationship view |
|
|
- get_neighbors: Shows neighboring node details, easier for exploration |
|
|
|
|
|
Args: |
|
|
node_id: The unique identifier of the node to inspect edges for |
|
|
|
|
|
Returns: |
|
|
str: List of incoming and outgoing edges with source/target node IDs and relationship types |
|
|
""" |
|
|
if knowledge_graph is None: |
|
|
return "Error: Knowledge graph not initialized" |
|
|
|
|
|
try: |
|
|
if node_id not in knowledge_graph.graph: |
|
|
return f"Error: Node '{node_id}' not found in knowledge graph" |
|
|
|
|
|
g = knowledge_graph.graph |
|
|
|
|
|
incoming = [ |
|
|
{"source": src, "target": tgt, "relation": data.get("relation", "?")} |
|
|
for src, tgt, data in g.in_edges(node_id, data=True) |
|
|
] |
|
|
outgoing = [ |
|
|
{"source": src, "target": tgt, "relation": data.get("relation", "?")} |
|
|
for src, tgt, data in g.out_edges(node_id, data=True) |
|
|
] |
|
|
|
|
|
result = f"""Node Edges for '{node_id}': |
|
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ |
|
|
|
|
|
Incoming Edges ({len(incoming)}): |
|
|
""" |
|
|
for edge in incoming[:20]: |
|
|
result += f" ← {edge['source']} [{edge['relation']}]\n" |
|
|
if len(incoming) > 20: |
|
|
result += f" ... and {len(incoming) - 20} more\n" |
|
|
|
|
|
result += f"\nOutgoing Edges ({len(outgoing)}):\n" |
|
|
for edge in outgoing[:20]: |
|
|
result += f" → {edge['target']} [{edge['relation']}]\n" |
|
|
if len(outgoing) > 20: |
|
|
result += f" ... and {len(outgoing) - 20} more\n" |
|
|
|
|
|
return result |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
@observe(as_type="tool") |
|
|
def search_nodes(query: str, limit: int = 10, page: int = 1) -> str: |
|
|
""" |
|
|
Search the Transformers codebase using keyword matching against code content and metadata. |
|
|
|
|
|
PURPOSE: |
|
|
This is your PRIMARY SEARCH TOOL for exploring the codebase. Use it to find relevant |
|
|
code chunks based on natural language queries, function names, class names, comments, |
|
|
or any text that might appear in the source code. |
|
|
|
|
|
WHEN TO USE: |
|
|
- FIRST STEP when investigating any topic in the Transformers library |
|
|
- To find implementations of specific features (e.g., "rotary embeddings", "flash attention") |
|
|
- To locate code by function/class name when you don't have the exact node ID |
|
|
- To discover code related to a concept (e.g., "gradient checkpointing", "tokenization") |
|
|
- When you don't know where something is implemented |
|
|
|
|
|
SEARCH TIPS: |
|
|
- Use specific technical terms: "rope embedding" rather than just "embedding" |
|
|
- Include class/function names if known: "BertSelfAttention forward" |
|
|
- Try multiple related queries if first results aren't satisfactory |
|
|
- Results are ranked by relevance to your query |
|
|
|
|
|
TYPICAL WORKFLOW: |
|
|
1. search_nodes("attention mask handling") -> find relevant chunks |
|
|
2. get_node_info(chunk_id) -> examine the code content |
|
|
3. get_chunk_context(chunk_id) -> see surrounding code for fuller picture |
|
|
4. go_to_definition(entity_name) -> find where an entity is defined |
|
|
|
|
|
Args: |
|
|
query: Search terms to match against code content. Can be natural language, function names, class names, or code snippets. More specific queries yield better results. |
|
|
limit: Results per page (default: 10, max recommended: 50). Use smaller limits for faster responses. |
|
|
page: Page number starting from 1. Use pagination to browse through many results. |
|
|
|
|
|
Returns: |
|
|
str: Ranked list of matching code chunks with IDs and content previews. Use the returned IDs with get_node_info or get_chunk_context for full details. |
|
|
""" |
|
|
if knowledge_graph is None: |
|
|
return "Error: Knowledge graph not initialized" |
|
|
|
|
|
try: |
|
|
|
|
|
if isinstance(limit, str): |
|
|
try: |
|
|
limit = int(limit) |
|
|
except ValueError: |
|
|
return f"Error: 'limit' must be an integer, got '{limit}'" |
|
|
|
|
|
|
|
|
if isinstance(page, str): |
|
|
try: |
|
|
page = int(page) |
|
|
except ValueError: |
|
|
return f"Error: 'page' must be an integer, got '{page}'" |
|
|
|
|
|
if limit <= 0: |
|
|
return "Error: limit must be a positive integer" |
|
|
if page < 1: |
|
|
return "Error: 'page' must be a positive integer (1 or greater)" |
|
|
|
|
|
|
|
|
max_fetch = limit * page |
|
|
results = knowledge_graph.code_index.query(query, n_results=max_fetch) |
|
|
metadatas = results.get("metadatas", [[]])[0] |
|
|
|
|
|
if not metadatas: |
|
|
return f"No results found for '{query}'." |
|
|
|
|
|
total = len(metadatas) |
|
|
|
|
|
total_pages = (total + limit - 1) // limit |
|
|
if page > total_pages: |
|
|
return f"Error: Page {page} does not exist. Total pages: {total_pages} (with {total} results at {limit} per page)" |
|
|
|
|
|
start_idx = (page - 1) * limit |
|
|
end_idx = start_idx + limit |
|
|
page_slice = metadatas[start_idx:end_idx] |
|
|
|
|
|
result = f"Search Results for '{query}' (Page {page}/{total_pages}, {total} total):\n" |
|
|
result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" |
|
|
|
|
|
for i, res in enumerate(page_slice, start=start_idx + 1): |
|
|
result += f"{i}. ID: {res.get('id', 'N/A')}\n" |
|
|
content = res.get('content', '') |
|
|
if content: |
|
|
result += f" Content: {content}\n" |
|
|
result += "\n" |
|
|
|
|
|
|
|
|
if page < total_pages: |
|
|
result += f"Use page={page + 1} to see the next page\n" |
|
|
|
|
|
return result |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
@observe(as_type="tool") |
|
|
def get_graph_stats() -> str: |
|
|
""" |
|
|
Get a comprehensive statistical overview of the Transformers library knowledge graph. |
|
|
|
|
|
PURPOSE: |
|
|
Use this tool to understand the scope and structure of the knowledge graph. |
|
|
Provides counts and breakdowns of all node types, entity types, and relationship types. |
|
|
|
|
|
WHEN TO USE: |
|
|
- At the START of an exploration session to understand the codebase scope |
|
|
- To learn what types of entities and relationships are available for querying |
|
|
- To understand the terminology used in this knowledge graph (chunks, entities, edges) |
|
|
- When you need to report on the overall structure of the Transformers library |
|
|
|
|
|
WHAT YOU'LL LEARN: |
|
|
- Total number of nodes and edges in the graph |
|
|
- Breakdown of node types (chunks, files, directories, entities) |
|
|
- Entity type distribution (classes, functions, methods, variables, etc.) |
|
|
- Edge relationship types (contains, calls, declares) |
|
|
- Definitions of key concepts used throughout the tools |
|
|
|
|
|
GRAPH TERMINOLOGY: |
|
|
- Chunks: Logical code segments (a function body, a class definition, etc.) |
|
|
- Entities: Named programming constructs tracked across the codebase |
|
|
- Edges: Relationships connecting nodes (contains, calls, declares) |
|
|
|
|
|
Returns: |
|
|
str: Detailed statistics including node counts by type, entity breakdown, edge relation counts, and concept definitions to help you use other tools effectively. |
|
|
""" |
|
|
if knowledge_graph is None: |
|
|
return "Error: Knowledge graph not initialized" |
|
|
|
|
|
try: |
|
|
g = knowledge_graph.graph |
|
|
num_nodes = g.number_of_nodes() |
|
|
num_edges = g.number_of_edges() |
|
|
|
|
|
|
|
|
node_types = {} |
|
|
entity_breakdown = {} |
|
|
|
|
|
for _, node_attrs in g.nodes(data=True): |
|
|
node_type = getattr(node_attrs['data'], 'node_type', 'Unknown') |
|
|
node_types[node_type] = node_types.get(node_type, 0) + 1 |
|
|
|
|
|
|
|
|
if node_type == 'entity': |
|
|
entity_type = getattr(node_attrs['data'], 'entity_type', 'Unknown') |
|
|
|
|
|
|
|
|
if not entity_type: |
|
|
node_id = node_attrs['data'].id if hasattr(node_attrs['data'], 'id') else None |
|
|
if node_id and node_id in knowledge_graph.entities: |
|
|
entity_types = knowledge_graph.entities[node_id].get('type', []) |
|
|
entity_type = entity_types[0] if entity_types else 'Unknown' |
|
|
|
|
|
entity_breakdown[entity_type] = entity_breakdown.get(entity_type, 0) + 1 |
|
|
|
|
|
|
|
|
edge_relations = {} |
|
|
for _, _, attrs in g.edges(data=True): |
|
|
relation = attrs.get('relation', 'Unknown') |
|
|
edge_relations[relation] = edge_relations.get(relation, 0) + 1 |
|
|
|
|
|
|
|
|
result = f"""Knowledge Graph Statistics: |
|
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ |
|
|
|
|
|
📊 Overview: |
|
|
Total Nodes: {num_nodes:,} |
|
|
Total Edges: {num_edges:,} |
|
|
|
|
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ |
|
|
|
|
|
📦 Node Types: |
|
|
""" |
|
|
|
|
|
|
|
|
for ntype, count in sorted(node_types.items(), key=lambda x: x[1], reverse=True): |
|
|
result += f" • {ntype}: {count:,}\n" |
|
|
|
|
|
|
|
|
if ntype == 'entity' and entity_breakdown: |
|
|
result += f" └─ Entity Breakdown:\n" |
|
|
for etype, ecount in sorted(entity_breakdown.items(), key=lambda x: x[1], reverse=True): |
|
|
percentage = (ecount / count * 100) if count > 0 else 0 |
|
|
result += f" ├─ {etype}: {ecount:,} ({percentage:.1f}%)\n" |
|
|
|
|
|
result += f""" |
|
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ |
|
|
|
|
|
🔗 Edge Relations: |
|
|
""" |
|
|
for relation, count in sorted(edge_relations.items(), key=lambda x: x[1], reverse=True): |
|
|
result += f" • {relation}: {count:,}\n" |
|
|
|
|
|
|
|
|
result += f""" |
|
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ |
|
|
|
|
|
ℹ️ Definitions: |
|
|
|
|
|
Chunks: Code segments representing logical portions of files. Each chunk |
|
|
contains a section of code (like a function, class, or code block) |
|
|
along with metadata about what entities it declares and calls. |
|
|
|
|
|
Entities: Programming constructs identified in the code including: |
|
|
- Classes: Class definitions |
|
|
- Functions: Function definitions |
|
|
- Methods: Class method definitions |
|
|
- Variables: Variable declarations |
|
|
- Parameters: Function/method parameters |
|
|
- Function_call/Method_call: Usage references |
|
|
|
|
|
Files: Source code files in the repository |
|
|
Directories: Folder structure containing files |
|
|
Repo: Root repository node |
|
|
|
|
|
Edge Relations: |
|
|
- contains: Parent-child relationships (file contains chunks) |
|
|
- declares: Entity declaration relationships |
|
|
- calls: Entity usage/invocation relationships |
|
|
""" |
|
|
|
|
|
return result |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
@observe(as_type="tool") |
|
|
def list_nodes_by_type(node_type: str, limit: int = 20, page: int = 1) -> str: |
|
|
""" |
|
|
List all nodes of a specific type in the Transformers knowledge graph with pagination. |
|
|
|
|
|
PURPOSE: |
|
|
Use this tool to browse and discover nodes by their type. Helpful when you want to |
|
|
see what classes, functions, files, or other constructs exist in the codebase. |
|
|
|
|
|
WHEN TO USE: |
|
|
- To get a list of all classes in the Transformers library: node_type='class' |
|
|
- To see all Python files: node_type='file' |
|
|
- To list all functions: node_type='function' |
|
|
- To browse all methods: node_type='method' |
|
|
- When you need to find node IDs for further exploration |
|
|
|
|
|
VALID node_type VALUES: |
|
|
For entities (programming constructs): |
|
|
- 'class': Class definitions (e.g., BertModel, GPT2LMHeadModel) |
|
|
- 'function': Standalone function definitions |
|
|
- 'method': Class method definitions |
|
|
- 'variable': Variable declarations |
|
|
- 'parameter': Function/method parameters |
|
|
|
|
|
For structural nodes: |
|
|
- 'file': Source code files |
|
|
- 'chunk': Code segments within files |
|
|
- 'directory': Folder structure nodes |
|
|
- 'repo': Repository root (typically one) |
|
|
|
|
|
COMPARISON WITH search_by_type_and_name: |
|
|
- list_nodes_by_type: Browse ALL nodes of a type (no name filter) |
|
|
- search_by_type_and_name: Filter by type AND search by name substring |
|
|
|
|
|
Args: |
|
|
node_type: The type to filter by. Use lowercase: 'class', 'function', 'method', 'file', 'chunk', 'directory' |
|
|
limit: Maximum results per page (default: 20). Increase for broader browsing. |
|
|
page: Page number starting from 1 for pagination through large result sets. |
|
|
|
|
|
Returns: |
|
|
str: Alphabetically sorted list of matching nodes with their IDs and types. Use IDs with get_node_info for details. |
|
|
""" |
|
|
if knowledge_graph is None: |
|
|
return "Error: Knowledge graph not initialized" |
|
|
|
|
|
try: |
|
|
|
|
|
if isinstance(limit, str): |
|
|
try: |
|
|
limit = int(limit) |
|
|
except ValueError: |
|
|
return f"Error: 'limit' must be an integer, got '{limit}'" |
|
|
|
|
|
if isinstance(page, str): |
|
|
try: |
|
|
page = int(page) |
|
|
except ValueError: |
|
|
return f"Error: 'page' must be an integer, got '{page}'" |
|
|
|
|
|
if limit <= 0: |
|
|
return "Error: limit must be a positive integer" |
|
|
if page < 1: |
|
|
return "Error: 'page' must be a positive integer (1 or greater)" |
|
|
|
|
|
g = knowledge_graph.graph |
|
|
matching_nodes = [] |
|
|
|
|
|
for node_id, data in g.nodes(data=True): |
|
|
node = data['data'] |
|
|
current_node_type = getattr(node, 'node_type', None) |
|
|
node_name = getattr(node, 'name', 'Unknown') |
|
|
|
|
|
|
|
|
if current_node_type == 'entity': |
|
|
entity_type = getattr(node, 'entity_type', '') |
|
|
|
|
|
|
|
|
if not entity_type and node_id in knowledge_graph.entities: |
|
|
entity_types = knowledge_graph.entities[node_id].get('type', []) |
|
|
entity_type = entity_types[0] if entity_types else '' |
|
|
|
|
|
if entity_type and entity_type.lower() == node_type.lower(): |
|
|
matching_nodes.append({ |
|
|
"id": node_id, |
|
|
"name": node_name, |
|
|
"type": f"entity ({entity_type})" |
|
|
}) |
|
|
|
|
|
elif current_node_type == node_type: |
|
|
matching_nodes.append({ |
|
|
"id": node_id, |
|
|
"name": node_name, |
|
|
"type": current_node_type |
|
|
}) |
|
|
|
|
|
|
|
|
matching_nodes.sort(key=lambda x: x['name'].lower()) |
|
|
|
|
|
total = len(matching_nodes) |
|
|
if total == 0: |
|
|
return f"No nodes found of type '{node_type}'." |
|
|
|
|
|
|
|
|
total_pages = (total + limit - 1) // limit |
|
|
if page > total_pages: |
|
|
return f"Error: Page {page} does not exist. Total pages: {total_pages} (with {total} nodes at {limit} per page)" |
|
|
|
|
|
start_idx = (page - 1) * limit |
|
|
end_idx = start_idx + limit |
|
|
page_slice = matching_nodes[start_idx:end_idx] |
|
|
|
|
|
result = f"Nodes of type '{node_type}' (Page {page}/{total_pages}, {total} total):\n" |
|
|
result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" |
|
|
|
|
|
for i, node in enumerate(page_slice, start=start_idx + 1): |
|
|
result += f"{i}. {node['name']}\n" |
|
|
result += f" ID: {node['id']}\n" |
|
|
result += f" Type: {node['type']}\n\n" |
|
|
|
|
|
|
|
|
if page < total_pages: |
|
|
result += f"Use page={page + 1} to see the next page\n" |
|
|
|
|
|
return result |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
@observe(as_type="tool") |
|
|
def get_neighbors(node_id: str, limit: int = 20, page: int = 1) -> str: |
|
|
""" |
|
|
Get all nodes directly connected to a given node with their relationship information. |
|
|
|
|
|
PURPOSE: |
|
|
Use this tool to explore the local neighborhood of any node in the knowledge graph. |
|
|
Shows what's connected to a node and how, making it easy to navigate the codebase structure. |
|
|
|
|
|
WHEN TO USE: |
|
|
- To explore what a node is connected to (files, chunks, entities) |
|
|
- To navigate from one code element to related elements |
|
|
- To understand the local structure around a specific node |
|
|
- After using get_node_info when you want to explore connected nodes |
|
|
- To discover related code without knowing exact names |
|
|
|
|
|
WHAT YOU'LL SEE: |
|
|
- Neighbor node IDs and names |
|
|
- Node types (chunk, file, entity, etc.) |
|
|
- Relationship direction (→ outgoing, ← incoming) |
|
|
- Relationship type (contains, calls, declares) |
|
|
|
|
|
TYPICAL NAVIGATION PATTERNS: |
|
|
- From a file: see its chunks and declared entities |
|
|
- From a chunk: see entities it declares/calls and its parent file |
|
|
- From an entity: see chunks that declare or call it |
|
|
- From a directory: see contained files and subdirectories |
|
|
|
|
|
COMPARISON WITH get_node_edges: |
|
|
- get_neighbors: Shows neighboring NODE details (name, type) - better for exploration |
|
|
- get_node_edges: Shows raw EDGE information - better for understanding relationships |
|
|
|
|
|
Args: |
|
|
node_id: The ID of the node to explore neighbors for |
|
|
limit: Maximum neighbors to return per page (default: 20) |
|
|
page: Page number for pagination when node has many connections |
|
|
|
|
|
Returns: |
|
|
str: List of connected nodes with their IDs, names, types, and the relationships connecting them |
|
|
""" |
|
|
if knowledge_graph is None: |
|
|
return "Error: Knowledge graph not initialized" |
|
|
|
|
|
try: |
|
|
if node_id not in knowledge_graph.graph: |
|
|
return f"Error: Node '{node_id}' not found in knowledge graph" |
|
|
|
|
|
|
|
|
if isinstance(limit, str): |
|
|
try: |
|
|
limit = int(limit) |
|
|
except ValueError: |
|
|
return f"Error: 'limit' must be an integer, got '{limit}'" |
|
|
|
|
|
if isinstance(page, str): |
|
|
try: |
|
|
page = int(page) |
|
|
except ValueError: |
|
|
return f"Error: 'page' must be an integer, got '{page}'" |
|
|
|
|
|
if limit <= 0: |
|
|
return "Error: limit must be a positive integer" |
|
|
if page < 1: |
|
|
return "Error: 'page' must be a positive integer (1 or greater)" |
|
|
|
|
|
neighbors = knowledge_graph.get_neighbors(node_id) |
|
|
if not neighbors: |
|
|
return f"No neighbors found for node '{node_id}'" |
|
|
|
|
|
total = len(neighbors) |
|
|
|
|
|
total_pages = (total + limit - 1) // limit |
|
|
if page > total_pages: |
|
|
return f"Error: Page {page} does not exist. Total pages: {total_pages} (with {total} neighbors at {limit} per page)" |
|
|
|
|
|
start_idx = (page - 1) * limit |
|
|
end_idx = start_idx + limit |
|
|
page_slice = neighbors[start_idx:end_idx] |
|
|
|
|
|
result = f"Neighbors of '{node_id}' (Page {page}/{total_pages}, {total} total):\n" |
|
|
result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" |
|
|
|
|
|
for i, neighbor in enumerate(page_slice, start=start_idx + 1): |
|
|
result += f"{i}. {neighbor.id}\n" |
|
|
result += f" Name: {getattr(neighbor, 'name', 'Unknown')}\n" |
|
|
result += f" Type: {neighbor.node_type}\n" |
|
|
|
|
|
if knowledge_graph.graph.has_edge(node_id, neighbor.id): |
|
|
edge_data = knowledge_graph.graph.get_edge_data(node_id, neighbor.id) |
|
|
result += f" → Relation: {edge_data.get('relation', 'Unknown')}\n" |
|
|
elif knowledge_graph.graph.has_edge(neighbor.id, node_id): |
|
|
edge_data = knowledge_graph.graph.get_edge_data(neighbor.id, node_id) |
|
|
result += f" ← Relation: {edge_data.get('relation', 'Unknown')}\n" |
|
|
result += "\n" |
|
|
|
|
|
|
|
|
if page < total_pages: |
|
|
result += f"Use page={page + 1} to see the next page\n" |
|
|
|
|
|
return result |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
@observe(as_type="tool") |
|
|
def go_to_definition(entity_name: str) -> str: |
|
|
""" |
|
|
Jump to the source code location(s) where an entity is defined/declared. |
|
|
|
|
|
PURPOSE: |
|
|
Use this tool to find WHERE in the codebase a class, function, method, or variable |
|
|
is defined. Returns the actual code content of the definition along with file location. |
|
|
|
|
|
WHEN TO USE: |
|
|
- To see the implementation of a class like 'BertModel' or 'GPT2Attention' |
|
|
- To find where a function is defined when you know its name |
|
|
- To examine the source code of any entity found through search or listing |
|
|
- When you need to understand HOW something is implemented (not just WHERE it's used) |
|
|
- To get the actual code definition for analysis or explanation |
|
|
|
|
|
WHAT YOU'LL GET: |
|
|
- Entity type (class, function, method, variable) |
|
|
- Data type if available |
|
|
- List of all locations where the entity is declared (some entities may be defined in multiple places) |
|
|
- For each location: file path, chunk order, and FULL CODE CONTENT |
|
|
|
|
|
TYPICAL WORKFLOW: |
|
|
1. search_nodes("attention") -> find entity names |
|
|
2. go_to_definition("BertSelfAttention") -> see the class implementation |
|
|
3. find_usages("BertSelfAttention") -> see where it's used |
|
|
|
|
|
COMPARISON WITH find_usages: |
|
|
- go_to_definition: Shows WHERE entity is DEFINED (the implementation) |
|
|
- find_usages: Shows WHERE entity is USED/CALLED (the consumers) |
|
|
|
|
|
Args: |
|
|
entity_name: Exact name of the entity (case-sensitive). Examples: 'BertModel', 'forward', 'attention_mask', 'get_extended_attention_mask' |
|
|
|
|
|
Returns: |
|
|
str: Entity type, file location(s), and complete source code of the definition(s). Returns error if entity not found. |
|
|
""" |
|
|
if knowledge_graph is None: |
|
|
return "Error: Knowledge graph not initialized" |
|
|
|
|
|
try: |
|
|
if entity_name not in knowledge_graph.entities: |
|
|
return f"Error: Entity '{entity_name}' not found in knowledge graph" |
|
|
|
|
|
entity_info = knowledge_graph.entities[entity_name] |
|
|
declaring_chunks = entity_info.get('declaring_chunk_ids', []) |
|
|
|
|
|
if not declaring_chunks: |
|
|
return f"Entity '{entity_name}' found but no declarations identified." |
|
|
|
|
|
result = f"Definition(s) for '{entity_name}':\n" |
|
|
result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" |
|
|
result += f"Type: {', '.join(entity_info.get('type', ['Unknown']))}\n" |
|
|
if entity_info.get('dtype'): |
|
|
result += f"Data Type: {entity_info['dtype']}\n" |
|
|
result += f"\nDeclared in {len(declaring_chunks)} location(s):\n\n" |
|
|
|
|
|
for i, chunk_id in enumerate(declaring_chunks[:5], 1): |
|
|
if chunk_id in knowledge_graph.graph: |
|
|
chunk = knowledge_graph.graph.nodes[chunk_id]['data'] |
|
|
result += f"{i}. Chunk: {chunk_id}\n" |
|
|
result += f" File: {chunk.path}\n" |
|
|
result += f" Order: {chunk.order_in_file}\n" |
|
|
result += f" Content:\n{chunk.content}\n\n" |
|
|
|
|
|
if len(declaring_chunks) > 5: |
|
|
result += f"... and {len(declaring_chunks) - 5} more locations\n" |
|
|
|
|
|
return result |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
@observe(as_type="tool") |
|
|
def find_usages(entity_name: str, limit: int = 20, page: int = 1) -> str: |
|
|
""" |
|
|
Find all locations in the codebase where an entity is used or called. |
|
|
|
|
|
PURPOSE: |
|
|
Use this tool to understand the impact and usage patterns of any entity. |
|
|
Shows every place where a class is instantiated, a function is called, |
|
|
or a variable is referenced throughout the Transformers library. |
|
|
|
|
|
WHEN TO USE: |
|
|
- To understand how widely used a class or function is |
|
|
- To see usage examples of a particular API or function |
|
|
- To assess the impact of changing an entity (who depends on it?) |
|
|
- To learn how to use a class/function by seeing real examples |
|
|
- To trace data flow through the codebase |
|
|
|
|
|
WHAT YOU'LL GET: |
|
|
- Total count of usage locations |
|
|
- For each usage: file path, chunk position, and full code context showing the usage |
|
|
- Paginated results for entities with many usages |
|
|
|
|
|
TYPICAL WORKFLOWS: |
|
|
|
|
|
Impact Analysis: |
|
|
1. go_to_definition("deprecated_function") -> understand what it does |
|
|
2. find_usages("deprecated_function") -> see all code that needs updating |
|
|
|
|
|
Learning by Example: |
|
|
1. list_nodes_by_type('class') -> find interesting classes |
|
|
2. find_usages("BertModel") -> see how it's instantiated and used |
|
|
|
|
|
COMPARISON WITH go_to_definition: |
|
|
- find_usages: WHERE is this entity CALLED/USED (consumers) |
|
|
- go_to_definition: WHERE is this entity DEFINED (implementation) |
|
|
|
|
|
Args: |
|
|
entity_name: Exact name of the entity to find usages for (case-sensitive) |
|
|
limit: Usages per page (default: 20). Many popular classes have 100+ usages. |
|
|
page: Page number for pagination (starts at 1) |
|
|
|
|
|
Returns: |
|
|
str: List of code chunks that use this entity, with file paths and full code content showing the usage in context |
|
|
""" |
|
|
if knowledge_graph is None: |
|
|
return "Error: Knowledge graph not initialized" |
|
|
|
|
|
try: |
|
|
|
|
|
if isinstance(limit, str): |
|
|
try: |
|
|
limit = int(limit) |
|
|
except ValueError: |
|
|
return f"Error: 'limit' must be an integer, got '{limit}'" |
|
|
|
|
|
|
|
|
if isinstance(page, str): |
|
|
try: |
|
|
page = int(page) |
|
|
except ValueError: |
|
|
return f"Error: 'page' must be an integer, got '{page}'" |
|
|
|
|
|
if entity_name not in knowledge_graph.entities: |
|
|
return f"Error: Entity '{entity_name}' not found in knowledge graph" |
|
|
|
|
|
if limit <= 0: |
|
|
return "Error: limit must be a positive integer" |
|
|
if page < 1: |
|
|
return "Error: 'page' must be a positive integer (1 or greater)" |
|
|
|
|
|
entity_info = knowledge_graph.entities[entity_name] |
|
|
calling_chunks = entity_info.get('calling_chunk_ids', []) |
|
|
|
|
|
if not calling_chunks: |
|
|
return f"Entity '{entity_name}' found but no usages identified." |
|
|
|
|
|
total = len(calling_chunks) |
|
|
|
|
|
total_pages = (total + limit - 1) // limit |
|
|
if page > total_pages: |
|
|
return f"Error: Page {page} does not exist. Total pages: {total_pages} (with {total} usages at {limit} per page)" |
|
|
|
|
|
start_idx = (page - 1) * limit |
|
|
end_idx = start_idx + limit |
|
|
page_slice = calling_chunks[start_idx:end_idx] |
|
|
|
|
|
result = f"Usages of '{entity_name}' (Page {page}/{total_pages}, {total} total):\n" |
|
|
result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" |
|
|
|
|
|
for i, chunk_id in enumerate(page_slice, start=start_idx + 1): |
|
|
if chunk_id in knowledge_graph.graph: |
|
|
chunk = knowledge_graph.graph.nodes[chunk_id]['data'] |
|
|
result += f"{i}. {chunk.path} (chunk {chunk.order_in_file})\n" |
|
|
result += f" Content:\n{chunk.content}\n\n" |
|
|
|
|
|
|
|
|
if page < total_pages: |
|
|
result += f"Use page={page + 1} to see the next page\n" |
|
|
|
|
|
return result |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
@observe(as_type="tool") |
|
|
def get_file_structure(file_path: str) -> str: |
|
|
""" |
|
|
Get a structural overview of a source file showing its chunks and declared entities. |
|
|
|
|
|
PURPOSE: |
|
|
Use this tool to understand the organization of a specific file. Shows what classes, |
|
|
functions, and other entities are defined in the file, plus how the file is divided into chunks. |
|
|
|
|
|
WHEN TO USE: |
|
|
- To get a table of contents for a file before diving into specifics |
|
|
- To see what classes and functions a file defines |
|
|
- To understand how code is organized within a file |
|
|
- To find chunk IDs for further exploration with get_node_info or get_chunk_context |
|
|
- When you know the file path but need to understand its contents |
|
|
|
|
|
WHAT YOU'LL SEE: |
|
|
- File path and detected programming language |
|
|
- Total number of code chunks in the file |
|
|
- List of declared entities (classes, functions, methods, variables) with their types |
|
|
- Ordered list of chunks with their IDs and descriptions |
|
|
|
|
|
HOW TO GET FILE PATHS: |
|
|
- Use list_files_in_directory() to browse files |
|
|
- Use search_nodes() and look at file paths in results |
|
|
- Use list_nodes_by_type('file') to get file node IDs (which are the paths) |
|
|
|
|
|
TYPICAL WORKFLOW: |
|
|
1. list_files_in_directory('src/transformers/models/bert') -> find files |
|
|
2. get_file_structure('src/transformers/models/bert/modeling_bert.py') -> see structure |
|
|
3. get_node_info(chunk_id) -> examine specific code chunks |
|
|
|
|
|
Args: |
|
|
file_path: The full path to the file (e.g., 'src/transformers/models/bert/modeling_bert.py'). Must match exactly as stored in the knowledge graph. |
|
|
|
|
|
Returns: |
|
|
str: File overview including language, chunk count, declared entities list, and chunk descriptions |
|
|
""" |
|
|
if knowledge_graph is None: |
|
|
return "Error: Knowledge graph not initialized" |
|
|
|
|
|
try: |
|
|
if file_path not in knowledge_graph.graph: |
|
|
return f"Error: File '{file_path}' not found in knowledge graph" |
|
|
|
|
|
file_node = knowledge_graph.graph.nodes[file_path]['data'] |
|
|
chunks = knowledge_graph.get_chunks_of_file(file_path) |
|
|
|
|
|
result = f"File Structure: {file_node.name}\n" |
|
|
result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" |
|
|
result += f"Path: {file_path}\n" |
|
|
result += f"Language: {getattr(file_node, 'language', 'Unknown')}\n" |
|
|
result += f"Total Chunks: {len(chunks)}\n\n" |
|
|
|
|
|
if hasattr(file_node, 'declared_entities') and file_node.declared_entities: |
|
|
result += f"Declared Entities ({len(file_node.declared_entities)}):\n" |
|
|
for entity in file_node.declared_entities[:15]: |
|
|
if isinstance(entity, dict): |
|
|
result += f" - {entity.get('name', '?')} ({entity.get('type', '?')})\n" |
|
|
else: |
|
|
result += f" - {entity}\n" |
|
|
if len(file_node.declared_entities) > 15: |
|
|
result += f" ... and {len(file_node.declared_entities) - 15} more\n" |
|
|
|
|
|
result += f"\nChunks:\n" |
|
|
for chunk in chunks[:10]: |
|
|
result += f" [{chunk.order_in_file}] {chunk.id}\n" |
|
|
if chunk.description: |
|
|
desc = chunk.description[:80] + "..." if len(chunk.description) > 80 else chunk.description |
|
|
result += f" {desc}\n" |
|
|
|
|
|
if len(chunks) > 10: |
|
|
result += f" ... and {len(chunks) - 10} more chunks\n" |
|
|
|
|
|
return result |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
@observe(as_type="tool") |
|
|
def get_related_chunks(chunk_id: str, relation_type: str = "calls", limit: int = 20, page: int = 1) -> str: |
|
|
""" |
|
|
Find code chunks connected to a given chunk through a specific relationship type. |
|
|
|
|
|
PURPOSE: |
|
|
Use this tool to trace code dependencies by following relationship edges from a chunk. |
|
|
Helps understand what code a chunk depends on or what depends on it. |
|
|
|
|
|
WHEN TO USE: |
|
|
- To find what entities/code a chunk calls or uses (relation_type='calls') |
|
|
- To trace dependencies from a specific piece of code |
|
|
- To explore the call graph emanating from a chunk |
|
|
- When you have a chunk ID and want to see connected code |
|
|
|
|
|
RELATIONSHIP TYPES: |
|
|
- 'calls': Entities/chunks that this chunk calls or references (most common) |
|
|
- 'contains': Child nodes contained by this node (for files/directories) |
|
|
- 'declares': Entities declared by this chunk |
|
|
- 'all' or '': Get all outgoing relationships regardless of type |
|
|
|
|
|
TYPICAL WORKFLOW: |
|
|
1. search_nodes("BertAttention forward") -> find a chunk |
|
|
2. get_related_chunks(chunk_id, 'calls') -> see what it calls |
|
|
3. get_node_info(related_chunk_id) -> examine called code |
|
|
|
|
|
COMPARISON WITH OTHER TOOLS: |
|
|
- get_neighbors: All connected nodes (any direction, any type) |
|
|
- get_related_chunks: Outgoing edges only, filtered by relationship type |
|
|
- entity_relationships: Focused on entity nodes and their relationships |
|
|
|
|
|
Args: |
|
|
chunk_id: The ID of the chunk to explore from (e.g., 'src/transformers/models/bert/modeling_bert.py::chunk_5') |
|
|
relation_type: Filter by relationship type: 'calls', 'contains', 'declares', or 'all' for everything (default: 'calls') |
|
|
limit: Maximum results per page (default: 20) |
|
|
page: Page number for pagination |
|
|
|
|
|
Returns: |
|
|
str: List of related chunks with their IDs, file paths, and entity names involved in the relationship |
|
|
""" |
|
|
if knowledge_graph is None: |
|
|
return "Error: Knowledge graph not initialized" |
|
|
|
|
|
try: |
|
|
if chunk_id not in knowledge_graph.graph: |
|
|
return f"Error: Chunk '{chunk_id}' not found in knowledge graph" |
|
|
|
|
|
|
|
|
if isinstance(limit, str): |
|
|
try: |
|
|
limit = int(limit) |
|
|
except ValueError: |
|
|
return f"Error: 'limit' must be an integer, got '{limit}'" |
|
|
|
|
|
if isinstance(page, str): |
|
|
try: |
|
|
page = int(page) |
|
|
except ValueError: |
|
|
return f"Error: 'page' must be an integer, got '{page}'" |
|
|
|
|
|
if limit <= 0: |
|
|
return "Error: limit must be a positive integer" |
|
|
if page < 1: |
|
|
return "Error: 'page' must be a positive integer (1 or greater)" |
|
|
|
|
|
related = [] |
|
|
if relation_type == "" or relation_type == "all": |
|
|
|
|
|
for _, target, attrs in knowledge_graph.graph.out_edges(chunk_id, data=True): |
|
|
target_node = knowledge_graph.graph.nodes[target]['data'] |
|
|
related.append({ |
|
|
"id": target, |
|
|
"file_path": getattr(target_node, 'path', 'Unknown'), |
|
|
"entity_name": attrs.get('entity_name') |
|
|
}) |
|
|
else: |
|
|
for _, target, attrs in knowledge_graph.graph.out_edges(chunk_id, data=True): |
|
|
if attrs.get('relation') == relation_type: |
|
|
target_node = knowledge_graph.graph.nodes[target]['data'] |
|
|
related.append({ |
|
|
"id": target, |
|
|
"file_path": getattr(target_node, 'path', 'Unknown'), |
|
|
"entity_name": attrs.get('entity_name') |
|
|
}) |
|
|
|
|
|
if not related: |
|
|
return f"No chunks found with '{relation_type}' relationship from '{chunk_id}'" |
|
|
|
|
|
total = len(related) |
|
|
|
|
|
total_pages = (total + limit - 1) // limit |
|
|
if page > total_pages: |
|
|
return f"Error: Page {page} does not exist. Total pages: {total_pages} (with {total} results at {limit} per page)" |
|
|
|
|
|
start_idx = (page - 1) * limit |
|
|
end_idx = start_idx + limit |
|
|
page_slice = related[start_idx:end_idx] |
|
|
|
|
|
result = f"Chunks related to '{chunk_id}' via '{relation_type}' (Page {page}/{total_pages}, {total} total):\n" |
|
|
result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" |
|
|
|
|
|
for i, chunk in enumerate(page_slice, start=start_idx + 1): |
|
|
result += f"{i}. {chunk['id']}\n" |
|
|
result += f" File: {chunk['file_path']}\n" |
|
|
if chunk['entity_name']: |
|
|
result += f" Entity: {chunk['entity_name']}\n" |
|
|
result += "\n" |
|
|
|
|
|
|
|
|
if page < total_pages: |
|
|
result += f"Use page={page + 1} to see the next page\n" |
|
|
|
|
|
return result |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
@observe(as_type="tool") |
|
|
def list_all_entities( |
|
|
limit: int = 50, |
|
|
page: int = 1, |
|
|
entity_type: Optional[str] = None, |
|
|
declared_in_repo: Optional[bool] = None, |
|
|
called_in_repo: Optional[bool] = None |
|
|
) -> str: |
|
|
""" |
|
|
Browse all programming entities (classes, functions, methods, variables) tracked in the knowledge graph. |
|
|
|
|
|
PURPOSE: |
|
|
Use this tool to explore the full inventory of code entities in the Transformers library. |
|
|
Supports filtering by type and usage patterns, making it powerful for targeted exploration. |
|
|
|
|
|
WHEN TO USE: |
|
|
- To browse all classes, functions, or methods in the codebase |
|
|
- To find entities that are defined but never used (dead code analysis) |
|
|
- To find external entities that are called but not defined in the repo |
|
|
- To get an overview of entity distribution in the codebase |
|
|
- When you need entity names for use with go_to_definition or find_usages |
|
|
|
|
|
FILTERING OPTIONS: |
|
|
|
|
|
By entity_type: |
|
|
- 'class': Class definitions (BertModel, GPT2Config, etc.) |
|
|
- 'function': Standalone functions |
|
|
- 'method': Class methods |
|
|
- 'variable': Variable declarations |
|
|
- 'parameter': Function/method parameters |
|
|
- None: All entity types |
|
|
|
|
|
By declaration status (declared_in_repo): |
|
|
- True: Only entities DEFINED in this repo (has source code) |
|
|
- False: Only external entities (imported from other packages) |
|
|
- None: All entities |
|
|
|
|
|
By usage status (called_in_repo): |
|
|
- True: Only entities that ARE USED somewhere in the code |
|
|
- False: Only entities that are NEVER USED (potential dead code) |
|
|
- None: All entities |
|
|
|
|
|
USEFUL FILTER COMBINATIONS: |
|
|
- All classes: entity_type='class' |
|
|
- Defined classes: entity_type='class', declared_in_repo=True |
|
|
- Unused functions: entity_type='function', called_in_repo=False |
|
|
- External dependencies: declared_in_repo=False, called_in_repo=True |
|
|
|
|
|
Args: |
|
|
limit: Entities per page (default: 50). Use larger values for comprehensive listings. |
|
|
page: Page number starting from 1 for pagination |
|
|
entity_type: Filter by type: 'class', 'function', 'method', 'variable', 'parameter', or None for all |
|
|
declared_in_repo: True=defined in repo, False=external only, None=all |
|
|
called_in_repo: True=has usages, False=never used, None=all |
|
|
|
|
|
Returns: |
|
|
str: List of entities with their types, declaration count, and usage count. Use entity names with go_to_definition or find_usages. |
|
|
""" |
|
|
if knowledge_graph is None: |
|
|
return "Error: Knowledge graph not initialized" |
|
|
|
|
|
try: |
|
|
|
|
|
if isinstance(limit, str): |
|
|
try: |
|
|
limit = int(limit) |
|
|
except ValueError: |
|
|
return f"Error: 'limit' must be an integer, got '{limit}'" |
|
|
|
|
|
|
|
|
if isinstance(page, str): |
|
|
try: |
|
|
page = int(page) |
|
|
except ValueError: |
|
|
return f"Error: 'page' must be an integer, got '{page}'" |
|
|
|
|
|
if page < 1: |
|
|
return "Error: 'page' must be a positive integer (1 or greater)" |
|
|
|
|
|
|
|
|
if entity_type == "" or entity_type == "null": |
|
|
entity_type = None |
|
|
|
|
|
|
|
|
if isinstance(declared_in_repo, str): |
|
|
if declared_in_repo.lower() in ("true", "1", "yes"): |
|
|
declared_in_repo = True |
|
|
elif declared_in_repo.lower() in ("false", "0", "no"): |
|
|
declared_in_repo = False |
|
|
elif declared_in_repo.lower() in ("none", "null", "all", ""): |
|
|
declared_in_repo = None |
|
|
|
|
|
|
|
|
if isinstance(called_in_repo, str): |
|
|
if called_in_repo.lower() in ("true", "1", "yes"): |
|
|
called_in_repo = True |
|
|
elif called_in_repo.lower() in ("false", "0", "no"): |
|
|
called_in_repo = False |
|
|
elif called_in_repo.lower() in ("none", "null", "all", ""): |
|
|
called_in_repo = None |
|
|
|
|
|
if not knowledge_graph.entities: |
|
|
return "No entities found in the knowledge graph." |
|
|
|
|
|
|
|
|
filtered_entities = {} |
|
|
for entity_name, info in knowledge_graph.entities.items(): |
|
|
|
|
|
if entity_type is not None: |
|
|
entity_types = [t.lower() if t else '' for t in info.get('type', [])] |
|
|
if entity_type.lower() not in entity_types: |
|
|
continue |
|
|
|
|
|
|
|
|
if declared_in_repo is not None: |
|
|
has_declaration = len(info.get('declaring_chunk_ids', [])) > 0 |
|
|
if declared_in_repo and not has_declaration: |
|
|
continue |
|
|
if not declared_in_repo and has_declaration: |
|
|
continue |
|
|
|
|
|
|
|
|
if called_in_repo is not None: |
|
|
has_calls = len(info.get('calling_chunk_ids', [])) > 0 |
|
|
if called_in_repo and not has_calls: |
|
|
continue |
|
|
if not called_in_repo and has_calls: |
|
|
continue |
|
|
|
|
|
filtered_entities[entity_name] = info |
|
|
|
|
|
|
|
|
if not filtered_entities: |
|
|
filter_desc = [] |
|
|
if entity_type: |
|
|
filter_desc.append(f"type={entity_type}") |
|
|
if declared_in_repo is not None: |
|
|
filter_desc.append(f"declared_in_repo={declared_in_repo}") |
|
|
if called_in_repo is not None: |
|
|
filter_desc.append(f"called_in_repo={called_in_repo}") |
|
|
filter_text = f" (filtered by {', '.join(filter_desc)})" if filter_desc else "" |
|
|
return f"No entities found{filter_text}." |
|
|
|
|
|
|
|
|
total_entities = len(filtered_entities) |
|
|
total_pages = (total_entities + limit - 1) // limit |
|
|
|
|
|
if page > total_pages: |
|
|
return f"Error: Page {page} does not exist. Total pages: {total_pages} (with {total_entities} entities at {limit} per page)" |
|
|
|
|
|
start_idx = (page - 1) * limit |
|
|
end_idx = start_idx + limit |
|
|
|
|
|
|
|
|
entity_items = list(filtered_entities.items()) |
|
|
paginated_items = entity_items[start_idx:end_idx] |
|
|
|
|
|
result = f"All Entities (Page {page}/{total_pages}, {total_entities} total):\n" |
|
|
result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" |
|
|
|
|
|
for i, (entity_name, info) in enumerate(paginated_items, start=start_idx + 1): |
|
|
result += f"{i}. {entity_name}\n" |
|
|
result += f" Types: {', '.join(info.get('type', ['Unknown']))}\n" |
|
|
result += f" Declarations: {len(info.get('declaring_chunk_ids', []))}\n" |
|
|
result += f" Usages: {len(info.get('calling_chunk_ids', []))}\n\n" |
|
|
|
|
|
|
|
|
result += f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n" |
|
|
result += f"Showing {start_idx + 1}-{min(end_idx, total_entities)} of {total_entities} entities\n" |
|
|
result += f"Page {page} of {total_pages}\n" |
|
|
|
|
|
if page < total_pages: |
|
|
result += f"Use page={page + 1} to see the next page\n" |
|
|
|
|
|
|
|
|
if entity_type: |
|
|
result += f"\n(Filtered by type={entity_type})\n" |
|
|
if declared_in_repo is not None: |
|
|
result += f"(Filtered by declared_in_repo={declared_in_repo})\n" |
|
|
if called_in_repo is not None: |
|
|
result += f"(Filtered by called_in_repo={called_in_repo})\n" |
|
|
|
|
|
return result |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
@observe(as_type="tool") |
|
|
def diff_chunks(node_id_1: str, node_id_2: str) -> str: |
|
|
""" |
|
|
Compare two code chunks and show their differences in unified diff format. |
|
|
|
|
|
PURPOSE: |
|
|
Use this tool to compare two pieces of code side-by-side. Shows exactly what's |
|
|
different between them using standard unified diff format (like git diff). |
|
|
|
|
|
WHEN TO USE: |
|
|
- To compare similar implementations (e.g., two attention mechanisms) |
|
|
- To understand differences between related classes or functions |
|
|
- To analyze variations in code patterns across the codebase |
|
|
- To compare two versions or implementations of similar functionality |
|
|
- When you suspect code duplication and want to see exact differences |
|
|
|
|
|
DIFF FORMAT: |
|
|
- Lines starting with '-' are only in the first chunk |
|
|
- Lines starting with '+' are only in the second chunk |
|
|
- Lines without prefix are common to both |
|
|
- @@ markers show line number context |
|
|
|
|
|
TYPICAL WORKFLOW: |
|
|
1. search_nodes("attention") -> find attention implementations |
|
|
2. Get chunk IDs from two different attention classes |
|
|
3. diff_chunks(chunk_id_1, chunk_id_2) -> compare implementations |
|
|
|
|
|
COMPARISON IDEAS: |
|
|
- BertAttention vs GPT2Attention |
|
|
- Different forward() implementations |
|
|
- Similar utility functions in different modules |
|
|
|
|
|
Args: |
|
|
node_id_1: ID of the first chunk/node to compare |
|
|
node_id_2: ID of the second chunk/node to compare |
|
|
|
|
|
Returns: |
|
|
str: Unified diff output showing line-by-line differences. Returns 'No differences found' if chunks are identical. |
|
|
""" |
|
|
if knowledge_graph is None: |
|
|
return "Error: Knowledge graph not initialized" |
|
|
|
|
|
try: |
|
|
if node_id_1 not in knowledge_graph.graph: |
|
|
return f"Error: Node '{node_id_1}' not found in knowledge graph" |
|
|
if node_id_2 not in knowledge_graph.graph: |
|
|
return f"Error: Node '{node_id_2}' not found in knowledge graph" |
|
|
|
|
|
g = knowledge_graph.graph |
|
|
content1 = getattr(g.nodes[node_id_1]['data'], 'content', None) |
|
|
content2 = getattr(g.nodes[node_id_2]['data'], 'content', None) |
|
|
|
|
|
if not content1 or not content2: |
|
|
return "Error: One or both nodes have no content." |
|
|
|
|
|
diff = list(difflib.unified_diff( |
|
|
content1.splitlines(), content2.splitlines(), |
|
|
fromfile=node_id_1, tofile=node_id_2, lineterm="" |
|
|
)) |
|
|
|
|
|
if not diff: |
|
|
return "No differences found between the two chunks." |
|
|
|
|
|
return "\n".join(diff) |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
@observe(as_type="tool") |
|
|
def print_tree(root_id: str = "root", max_depth: int = 3) -> str: |
|
|
""" |
|
|
Display a hierarchical tree view of the repository structure starting from any node. |
|
|
|
|
|
PURPOSE: |
|
|
Use this tool to visualize the structure of the codebase. Shows parent-child relationships |
|
|
in a familiar tree format, helping you understand how files and directories are organized. |
|
|
|
|
|
WHEN TO USE: |
|
|
- To explore the directory structure of the Transformers repository |
|
|
- To see what's inside a specific directory (use directory as root_id) |
|
|
- To understand the file organization for a component |
|
|
- To get an overview of the codebase hierarchy |
|
|
- When you need to understand where files are located |
|
|
|
|
|
TREE VISUALIZATION: |
|
|
- Each level shows node name and type (repo, directory, file, chunk) |
|
|
- Indentation represents depth in the hierarchy |
|
|
- Children are limited to prevent overwhelming output |
|
|
|
|
|
TIPS: |
|
|
- Start with max_depth=2 for a high-level overview |
|
|
- Increase max_depth to see more detail (but output gets larger) |
|
|
- Use a directory path as root_id to focus on a specific area |
|
|
- Use list_files_in_directory for more detailed file listings |
|
|
|
|
|
TYPICAL USAGE: |
|
|
- print_tree('root', max_depth=2) -> see top-level structure |
|
|
- print_tree('src/transformers/models', max_depth=2) -> see model organization |
|
|
- print_tree('src/transformers/models/bert', max_depth=3) -> see bert module structure |
|
|
|
|
|
Args: |
|
|
root_id: Starting node ID. Use 'root' for repository root, or a directory/file path to start from a specific location. |
|
|
max_depth: How many levels deep to show (default: 3). Higher values show more detail but larger output. |
|
|
|
|
|
Returns: |
|
|
str: ASCII tree visualization showing the hierarchical structure with node names and types |
|
|
""" |
|
|
if knowledge_graph is None: |
|
|
return "Error: Knowledge graph not initialized" |
|
|
|
|
|
try: |
|
|
|
|
|
if isinstance(max_depth, str): |
|
|
try: |
|
|
max_depth = int(max_depth) |
|
|
except ValueError: |
|
|
return f"Error: 'max_depth' must be an integer, got '{max_depth}'" |
|
|
|
|
|
g = knowledge_graph.graph |
|
|
|
|
|
if root_id not in g: |
|
|
|
|
|
roots = [n for n, d in g.nodes(data=True) |
|
|
if getattr(d['data'], 'node_type', None) in ('repo', 'directory', 'file')] |
|
|
if roots: |
|
|
root_id = roots[0] |
|
|
else: |
|
|
return f"Error: Node '{root_id}' not found and no suitable root found" |
|
|
|
|
|
result = f"Tree View (starting from '{root_id}', max depth: {max_depth}):\n" |
|
|
result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" |
|
|
|
|
|
def format_node(node_id, depth): |
|
|
if depth > max_depth: |
|
|
return "" |
|
|
|
|
|
node = g.nodes[node_id]['data'] |
|
|
name = getattr(node, 'name', node_id) |
|
|
node_type = getattr(node, 'node_type', '?') |
|
|
|
|
|
line = " " * depth + f"- {name} ({node_type})\n" |
|
|
|
|
|
children = [t for s, t in g.out_edges(node_id)] |
|
|
for child in children[:20]: |
|
|
line += format_node(child, depth + 1) |
|
|
|
|
|
if len(children) > 20: |
|
|
line += " " * (depth + 1) + f"... and {len(children) - 20} more\n" |
|
|
|
|
|
return line |
|
|
|
|
|
result += format_node(root_id, 0) |
|
|
return result |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
@observe(as_type="tool") |
|
|
def entity_relationships(node_id: str) -> str: |
|
|
""" |
|
|
Display all incoming and outgoing relationships for any node, with relationship types. |
|
|
|
|
|
PURPOSE: |
|
|
Use this tool to get a complete picture of how a node connects to the rest of the |
|
|
knowledge graph. Shows both what points TO this node and what this node points TO. |
|
|
|
|
|
WHEN TO USE: |
|
|
- To understand all dependencies of an entity |
|
|
- To see what declares or calls a specific entity |
|
|
- To trace the full relationship network around any node |
|
|
- When you need more detail than get_neighbors provides about relationship types |
|
|
- For entity-centric analysis (understanding a class or function's connections) |
|
|
|
|
|
WHAT YOU'LL SEE: |
|
|
- Incoming relationships: Other nodes that have edges pointing TO this node |
|
|
(e.g., chunks that CALL this function, files that CONTAIN this chunk) |
|
|
- Outgoing relationships: This node's edges pointing TO other nodes |
|
|
(e.g., entities this chunk CALLS, chunks this file CONTAINS) |
|
|
- Relationship types for each edge (calls, declares, contains) |
|
|
|
|
|
COMPARISON WITH SIMILAR TOOLS: |
|
|
- get_node_edges: Same information but different formatting |
|
|
- get_neighbors: Shows neighbor node details, not edge details |
|
|
- get_related_chunks: Filtered by relationship type, chunks only |
|
|
|
|
|
TYPICAL WORKFLOW: |
|
|
1. go_to_definition("BertModel") -> find entity |
|
|
2. entity_relationships("BertModel") -> see what calls/uses BertModel |
|
|
|
|
|
Args: |
|
|
node_id: The ID of any node (entity, chunk, file, directory) |
|
|
|
|
|
Returns: |
|
|
str: Complete list of incoming and outgoing relationships with source/target IDs and relationship types |
|
|
""" |
|
|
if knowledge_graph is None: |
|
|
return "Error: Knowledge graph not initialized" |
|
|
|
|
|
try: |
|
|
if node_id not in knowledge_graph.graph: |
|
|
return f"Error: Node '{node_id}' not found in knowledge graph" |
|
|
|
|
|
g = knowledge_graph.graph |
|
|
|
|
|
result = f"Relationships for '{node_id}':\n" |
|
|
result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" |
|
|
|
|
|
incoming = list(g.in_edges(node_id, data=True)) |
|
|
outgoing = list(g.out_edges(node_id, data=True)) |
|
|
|
|
|
if incoming: |
|
|
result += f"Incoming Relationships ({len(incoming)}):\n" |
|
|
for source, target, data in incoming[:20]: |
|
|
result += f" ← {source} [{data.get('relation', '?')}]\n" |
|
|
if len(incoming) > 20: |
|
|
result += f" ... and {len(incoming) - 20} more\n" |
|
|
result += "\n" |
|
|
|
|
|
if outgoing: |
|
|
result += f"Outgoing Relationships ({len(outgoing)}):\n" |
|
|
for source, target, data in outgoing[:20]: |
|
|
result += f" → {target} [{data.get('relation', '?')}]\n" |
|
|
if len(outgoing) > 20: |
|
|
result += f" ... and {len(outgoing) - 20} more\n" |
|
|
|
|
|
if not incoming and not outgoing: |
|
|
result += "No relationships found.\n" |
|
|
|
|
|
return result |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
@observe(as_type="tool") |
|
|
def search_by_type_and_name(node_type: str, name_query: str, limit: int = 10, page: int = 1, partial_allowed: bool = True) -> str: |
|
|
""" |
|
|
Search for nodes by combining type filtering with name pattern matching. |
|
|
|
|
|
PURPOSE: |
|
|
Use this tool for precise, targeted searches when you know the type of node you're looking |
|
|
for and have a partial name. More efficient than list_nodes_by_type when you have name hints. |
|
|
|
|
|
WHEN TO USE: |
|
|
- To find all classes containing 'Attention': search_by_type_and_name('class', 'Attention') |
|
|
- To find functions with 'forward' in name: search_by_type_and_name('function', 'forward') |
|
|
- To find files named 'config': search_by_type_and_name('file', 'config') |
|
|
- When you know the type AND have a partial name to search for |
|
|
- For pattern-based discovery of related components |
|
|
|
|
|
SEARCH BEHAVIOR: |
|
|
- Case-insensitive matching |
|
|
- partial_allowed=True (default): Fuzzy matching, finds 'BertEmbeddings' when searching 'Embed' |
|
|
- partial_allowed=False: Requires exact substring match |
|
|
- Results sorted by match quality (exact matches first, then substring, then fuzzy) |
|
|
|
|
|
VALID node_type VALUES: |
|
|
For entities: 'class', 'function', 'method', 'variable', 'parameter' |
|
|
For structural: 'file', 'chunk', 'directory' |
|
|
|
|
|
SEARCH EXAMPLES: |
|
|
- All Attention classes: search_by_type_and_name('class', 'Attention') |
|
|
- All Embedding classes: search_by_type_and_name('class', 'Embedding') |
|
|
- Config files: search_by_type_and_name('file', 'config') |
|
|
- Forward methods: search_by_type_and_name('method', 'forward') |
|
|
- Test files: search_by_type_and_name('file', 'test_') |
|
|
|
|
|
COMPARISON WITH SIMILAR TOOLS: |
|
|
- search_nodes: Full-text search in code content (doesn't filter by type) |
|
|
- list_nodes_by_type: Lists all of a type (no name filter) |
|
|
- search_by_type_and_name: Combines type filter + name search (best of both) |
|
|
|
|
|
Args: |
|
|
node_type: Type to filter by: 'class', 'function', 'method', 'file', 'chunk', 'directory', etc. |
|
|
name_query: Name pattern to search for (case-insensitive). Can be partial. |
|
|
limit: Results per page (default: 10) |
|
|
page: Page number for pagination |
|
|
partial_allowed: Enable fuzzy matching (default: True). Set False for stricter matching. |
|
|
|
|
|
Returns: |
|
|
str: Matching nodes sorted by relevance, with IDs and types. Use IDs with get_node_info for details. |
|
|
""" |
|
|
if knowledge_graph is None: |
|
|
return "Error: Knowledge graph not initialized" |
|
|
|
|
|
try: |
|
|
|
|
|
if isinstance(limit, str): |
|
|
try: |
|
|
limit = int(limit) |
|
|
except ValueError: |
|
|
return f"Error: 'limit' must be an integer, got '{limit}'" |
|
|
|
|
|
if isinstance(page, str): |
|
|
try: |
|
|
page = int(page) |
|
|
except ValueError: |
|
|
return f"Error: 'page' must be an integer, got '{page}'" |
|
|
|
|
|
|
|
|
if isinstance(partial_allowed, str): |
|
|
partial_allowed = partial_allowed.lower() in ('true', '1', 'yes') |
|
|
|
|
|
if limit <= 0: |
|
|
return "Error: limit must be a positive integer" |
|
|
if page < 1: |
|
|
return "Error: 'page' must be a positive integer (1 or greater)" |
|
|
|
|
|
g = knowledge_graph.graph |
|
|
matches = [] |
|
|
query_lower = name_query.lower() |
|
|
|
|
|
|
|
|
|
|
|
if partial_allowed: |
|
|
|
|
|
|
|
|
partial_pattern = '.*'.join(re.escape(c) for c in query_lower) |
|
|
partial_regex = re.compile(partial_pattern, re.IGNORECASE) |
|
|
|
|
|
for nid, n in g.nodes(data=True): |
|
|
node = n['data'] |
|
|
node_name = getattr(node, 'name', '') |
|
|
|
|
|
if not node_name: |
|
|
continue |
|
|
|
|
|
|
|
|
name_matches = False |
|
|
if partial_allowed: |
|
|
|
|
|
if query_lower in node_name.lower() or partial_regex.search(node_name): |
|
|
name_matches = True |
|
|
else: |
|
|
|
|
|
if query_lower in node_name.lower(): |
|
|
name_matches = True |
|
|
|
|
|
if not name_matches: |
|
|
continue |
|
|
|
|
|
|
|
|
current_node_type = getattr(node, 'node_type', None) |
|
|
|
|
|
|
|
|
if current_node_type == 'entity': |
|
|
entity_type = getattr(node, 'entity_type', '') |
|
|
|
|
|
|
|
|
|
|
|
if not entity_type and nid in knowledge_graph.entities: |
|
|
entity_types = knowledge_graph.entities[nid].get('type', []) |
|
|
entity_type = entity_types[0] if entity_types else '' |
|
|
|
|
|
if entity_type and entity_type.lower() == node_type.lower(): |
|
|
|
|
|
score = 0 if query_lower == node_name.lower() else (1 if query_lower in node_name.lower() else 2) |
|
|
matches.append({ |
|
|
"id": nid, |
|
|
"name": node_name, |
|
|
"type": f"entity ({entity_type})", |
|
|
"score": score |
|
|
}) |
|
|
|
|
|
elif current_node_type == node_type: |
|
|
score = 0 if query_lower == node_name.lower() else (1 if query_lower in node_name.lower() else 2) |
|
|
matches.append({ |
|
|
"id": nid, |
|
|
"name": node_name, |
|
|
"type": current_node_type, |
|
|
"score": score |
|
|
}) |
|
|
|
|
|
|
|
|
matches.sort(key=lambda x: (x['score'], x['name'].lower())) |
|
|
|
|
|
total = len(matches) |
|
|
if total == 0: |
|
|
return f"No matches for type '{node_type}' and name containing '{name_query}'." |
|
|
|
|
|
|
|
|
total_pages = (total + limit - 1) // limit |
|
|
if page > total_pages: |
|
|
return f"Error: Page {page} does not exist. Total pages: {total_pages} (with {total} results at {limit} per page)" |
|
|
|
|
|
start_idx = (page - 1) * limit |
|
|
end_idx = start_idx + limit |
|
|
page_slice = matches[start_idx:end_idx] |
|
|
|
|
|
result = f"Matches for type '{node_type}' and name '{name_query}' (Page {page}/{total_pages}, {total} total):\n" |
|
|
result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" |
|
|
|
|
|
for i, match in enumerate(page_slice, start=start_idx + 1): |
|
|
result += f"{i}. {match['name']}\n" |
|
|
result += f" ID: {match['id']}\n" |
|
|
result += f" Type: {match['type']}\n\n" |
|
|
|
|
|
if page < total_pages: |
|
|
result += f"Use page={page + 1} to see the next page\n" |
|
|
|
|
|
return result |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
@observe(as_type="tool") |
|
|
def get_chunk_context(node_id: str) -> str: |
|
|
""" |
|
|
Get expanded code context by retrieving a chunk along with its previous and next chunks. |
|
|
|
|
|
PURPOSE: |
|
|
Use this tool when you need to see MORE CODE CONTEXT around a specific chunk. |
|
|
Chunks are logical code segments, but sometimes you need to see surrounding code |
|
|
to fully understand the implementation. |
|
|
|
|
|
WHEN TO USE: |
|
|
- After search_nodes or get_node_info when you need more surrounding context |
|
|
- When a chunk shows a partial function/class and you need the complete picture |
|
|
- To understand code flow across chunk boundaries |
|
|
- To see imports or setup code that precedes a chunk |
|
|
- To see what code follows after a chunk |
|
|
|
|
|
WHAT YOU'LL GET: |
|
|
- The previous chunk's content (if it exists) |
|
|
- The target chunk's content |
|
|
- The next chunk's content (if it exists) |
|
|
- All organized by file and joined together seamlessly |
|
|
|
|
|
CONTEXT EXPANSION: |
|
|
- Shows up to 3 consecutive chunks (prev + current + next) |
|
|
- Useful for understanding function bodies that span chunks |
|
|
- Helps see class context when looking at individual methods |
|
|
|
|
|
TYPICAL WORKFLOW: |
|
|
1. search_nodes("attention forward") -> find relevant chunk |
|
|
2. get_node_info(chunk_id) -> see chunk content |
|
|
3. get_chunk_context(chunk_id) -> see surrounding code for fuller understanding |
|
|
|
|
|
COMPARISON WITH get_node_info: |
|
|
- get_node_info: Single chunk content + full metadata |
|
|
- get_chunk_context: Expanded code view (prev + current + next chunks), less metadata |
|
|
|
|
|
Args: |
|
|
node_id: The chunk ID to get context for (e.g., 'src/transformers/models/bert/modeling_bert.py::chunk_5') |
|
|
|
|
|
Returns: |
|
|
str: Combined content of previous, current, and next chunks organized by file. Provides seamless code view. |
|
|
""" |
|
|
|
|
|
|
|
|
if knowledge_graph is None: |
|
|
return "Error: Knowledge graph not initialized" |
|
|
|
|
|
try: |
|
|
if node_id not in knowledge_graph.graph: |
|
|
return f"Error: Node '{node_id}' not found in knowledge graph" |
|
|
|
|
|
g = knowledge_graph.graph |
|
|
current_chunk = g.nodes[node_id]['data'] |
|
|
previous_chunk = knowledge_graph.get_previous_chunk(node_id) |
|
|
next_chunk = knowledge_graph.get_next_chunk(node_id) |
|
|
|
|
|
|
|
|
chunks = [] |
|
|
if previous_chunk: |
|
|
chunks.append(previous_chunk) |
|
|
chunks.append(current_chunk) |
|
|
if next_chunk: |
|
|
chunks.append(next_chunk) |
|
|
|
|
|
|
|
|
organized = organize_chunks_by_file_name(chunks) |
|
|
full_content = join_organized_chunks(organized) |
|
|
|
|
|
return full_content |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
@observe(as_type="tool") |
|
|
def get_file_stats(path: str) -> str: |
|
|
""" |
|
|
Get detailed statistics and metrics for a specific file or directory. |
|
|
|
|
|
PURPOSE: |
|
|
Use this tool to get quantitative metrics about a file including line counts, |
|
|
entity counts, and chunk counts. Useful for understanding file complexity. |
|
|
|
|
|
WHEN TO USE: |
|
|
- To assess the size and complexity of a file |
|
|
- To see summary counts of entities declared and called |
|
|
- To understand how a file is chunked |
|
|
- For code metrics and analysis tasks |
|
|
- When deciding which files to explore further |
|
|
|
|
|
METRICS PROVIDED: |
|
|
- Line count (total lines in the file) |
|
|
- Declared entities count with a sample list |
|
|
- Called entities count with a sample list |
|
|
- Number of chunks the file is divided into |
|
|
|
|
|
COMPARISON WITH get_file_structure: |
|
|
- get_file_stats: Quantitative metrics (counts, numbers) |
|
|
- get_file_structure: Qualitative overview (entity names, chunk IDs) |
|
|
|
|
|
TYPICAL USAGE: |
|
|
- get_file_stats('src/transformers/models/bert/modeling_bert.py') -> see metrics |
|
|
- Use this to identify large/complex files before diving in |
|
|
|
|
|
Args: |
|
|
path: The file path to analyze. Must match the path as stored in the knowledge graph. |
|
|
|
|
|
Returns: |
|
|
str: Statistics including line count, declared entities, called entities, and chunk count |
|
|
""" |
|
|
if knowledge_graph is None: |
|
|
return "Error: Knowledge graph not initialized" |
|
|
|
|
|
try: |
|
|
g = knowledge_graph.graph |
|
|
nodes = [n for n, d in g.nodes(data=True) if getattr(d['data'], 'path', None) == path] |
|
|
|
|
|
if not nodes: |
|
|
return f"No nodes found for path '{path}'." |
|
|
|
|
|
result = f"Statistics for '{path}':\n" |
|
|
result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" |
|
|
|
|
|
for node_id in nodes: |
|
|
node = g.nodes[node_id]['data'] |
|
|
content = getattr(node, 'content', '') |
|
|
declared = getattr(node, 'declared_entities', []) |
|
|
called = getattr(node, 'called_entities', []) |
|
|
chunks = [t for s, t in g.out_edges(node_id) |
|
|
if getattr(g.nodes[t]['data'], 'node_type', None) == 'chunk'] |
|
|
|
|
|
result += f"Node: {node_id} ({getattr(node, 'node_type', '?')})\n" |
|
|
result += f" Lines: {len(content.splitlines()) if content else 0}\n" |
|
|
result += f" Declared entities: {len(declared)}\n" |
|
|
|
|
|
if declared: |
|
|
for entity in declared[:10]: |
|
|
if isinstance(entity, dict): |
|
|
result += f" - {entity.get('name', '?')} ({entity.get('type', '?')})\n" |
|
|
else: |
|
|
result += f" - {entity}\n" |
|
|
if len(declared) > 10: |
|
|
result += f" ... and {len(declared) - 10} more\n" |
|
|
|
|
|
result += f" Called entities: {len(called)}\n" |
|
|
if called: |
|
|
for entity in called[:10]: |
|
|
result += f" - {entity}\n" |
|
|
if len(called) > 10: |
|
|
result += f" ... and {len(called) - 10} more\n" |
|
|
|
|
|
result += f" Chunks: {len(chunks)}\n\n" |
|
|
|
|
|
return result |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
@observe(as_type="tool") |
|
|
def find_path(source_id: str, target_id: str, max_depth: int = 5) -> str: |
|
|
""" |
|
|
Find the shortest path between two nodes in the knowledge graph. |
|
|
|
|
|
PURPOSE: |
|
|
Use this tool to discover how two code elements are connected through the graph. |
|
|
Reveals the chain of relationships linking two seemingly unrelated pieces of code. |
|
|
|
|
|
WHEN TO USE: |
|
|
- To understand how two classes/functions are related |
|
|
- To trace dependency chains between components |
|
|
- To discover indirect connections between code elements |
|
|
- To verify if two nodes are connected at all |
|
|
- For understanding code architecture and coupling |
|
|
|
|
|
WHAT YOU'LL GET: |
|
|
- Path length (number of hops) |
|
|
- Ordered list of nodes from source to target |
|
|
- Visual representation of the path |
|
|
|
|
|
LIMITATIONS: |
|
|
- max_depth limits search to avoid long computations |
|
|
- If no path found within max_depth, nodes may still be connected via longer path |
|
|
- Very distant nodes may require increasing max_depth |
|
|
|
|
|
EXAMPLE QUERIES: |
|
|
- How is BertModel connected to GPT2Model? |
|
|
- What's the path from a utility function to a model class? |
|
|
- How many hops between two files? |
|
|
|
|
|
TYPICAL WORKFLOW: |
|
|
1. Identify two node IDs of interest |
|
|
2. find_path(source, target) -> discover connection |
|
|
3. get_node_info for nodes in the path to understand the relationship |
|
|
|
|
|
Args: |
|
|
source_id: Starting node ID (any node type) |
|
|
target_id: Destination node ID (any node type) |
|
|
max_depth: Maximum path length to search (default: 5). Increase for distant nodes. |
|
|
|
|
|
Returns: |
|
|
str: Path from source to target showing each node in sequence, or message if no path found |
|
|
""" |
|
|
if knowledge_graph is None: |
|
|
return "Error: Knowledge graph not initialized" |
|
|
|
|
|
try: |
|
|
|
|
|
if isinstance(max_depth, str): |
|
|
try: |
|
|
max_depth = int(max_depth) |
|
|
except ValueError: |
|
|
return f"Error: 'max_depth' must be an integer, got '{max_depth}'" |
|
|
|
|
|
path_result = knowledge_graph.find_path(source_id, target_id, max_depth) |
|
|
|
|
|
if "error" in path_result: |
|
|
return f"Error: {path_result['error']}" |
|
|
|
|
|
if not path_result.get("path"): |
|
|
return f"No path found from '{source_id}' to '{target_id}' within depth {max_depth}" |
|
|
|
|
|
result = f"Path from '{source_id}' to '{target_id}':\n" |
|
|
result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" |
|
|
result += f"Length: {path_result['length']}\n\n" |
|
|
|
|
|
path = path_result['path'] |
|
|
for i, node_id in enumerate(path): |
|
|
result += f"{i}. {node_id}\n" |
|
|
if i < len(path) - 1: |
|
|
result += " ↓\n" |
|
|
|
|
|
return result |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
@observe(as_type="tool") |
|
|
def get_subgraph(node_id: str, depth: int = 2, edge_types: Optional[str] = None) -> str: |
|
|
""" |
|
|
Extract a local subgraph around a node up to a specified depth. |
|
|
|
|
|
PURPOSE: |
|
|
Use this tool to get a bounded view of the graph neighborhood around any node. |
|
|
Shows all nodes reachable within a certain number of hops, optionally filtered by edge type. |
|
|
|
|
|
WHEN TO USE: |
|
|
- To understand the local network around a class or function |
|
|
- To extract a bounded region of the knowledge graph for analysis |
|
|
- To see all nodes within N hops of a target node |
|
|
- To analyze the dependency neighborhood of a component |
|
|
- When get_neighbors isn't enough and you need multi-hop exploration |
|
|
|
|
|
DEPTH EXPLANATION: |
|
|
- depth=1: Only immediate neighbors (same as get_neighbors) |
|
|
- depth=2: Neighbors and their neighbors (2 hops) |
|
|
- depth=3+: Larger neighborhood (exponentially more nodes) |
|
|
|
|
|
EDGE TYPE FILTERING: |
|
|
- Pass comma-separated edge types to filter: 'calls,declares' |
|
|
- Common types: 'calls', 'contains', 'declares' |
|
|
- Leave empty or None for all edge types |
|
|
|
|
|
OUTPUT: |
|
|
- Node count and edge count in the subgraph |
|
|
- List of all node IDs in the extracted subgraph |
|
|
- Filtered by edge types if specified |
|
|
|
|
|
TYPICAL WORKFLOW: |
|
|
1. Find a central node of interest |
|
|
2. get_subgraph(node_id, depth=2) -> see local neighborhood |
|
|
3. Use node IDs from result with get_node_info for details |
|
|
|
|
|
COMPARISON WITH get_neighbors: |
|
|
- get_neighbors: Single hop, shows node details |
|
|
- get_subgraph: Multi-hop, shows subgraph structure and counts |
|
|
|
|
|
Args: |
|
|
node_id: Central node to build subgraph around |
|
|
depth: Radius in hops from central node (default: 2). Higher = larger subgraph. |
|
|
edge_types: Optional comma-separated filter: 'calls,contains,declares' or None for all |
|
|
|
|
|
Returns: |
|
|
str: Subgraph summary with node/edge counts and list of included node IDs |
|
|
""" |
|
|
if knowledge_graph is None: |
|
|
return "Error: Knowledge graph not initialized" |
|
|
|
|
|
try: |
|
|
|
|
|
if isinstance(depth, str): |
|
|
try: |
|
|
depth = int(depth) |
|
|
except ValueError: |
|
|
return f"Error: 'depth' must be an integer, got '{depth}'" |
|
|
|
|
|
edge_types_list = edge_types.split(",") if edge_types else None |
|
|
subgraph_result = knowledge_graph.get_subgraph(node_id, depth, edge_types_list) |
|
|
|
|
|
if "error" in subgraph_result: |
|
|
return f"Error: {subgraph_result['error']}" |
|
|
|
|
|
result = f"Subgraph around '{node_id}' (depth: {depth}):\n" |
|
|
result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" |
|
|
result += f"Nodes: {subgraph_result['node_count']}\n" |
|
|
result += f"Edges: {subgraph_result['edge_count']}\n" |
|
|
|
|
|
if edge_types_list: |
|
|
result += f"Filtered by edge types: {', '.join(edge_types_list)}\n" |
|
|
|
|
|
result += "\nNodes in subgraph:\n" |
|
|
for node in subgraph_result['nodes'][:30]: |
|
|
result += f" - {node}\n" |
|
|
|
|
|
if len(subgraph_result['nodes']) > 30: |
|
|
result += f" ... and {len(subgraph_result['nodes']) - 30} more\n" |
|
|
|
|
|
return result |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
@observe(as_type="tool") |
|
|
def list_files_in_directory(directory_path: str = "", pattern: str = "*", recursive: bool = True, limit: int = 50, page: int = 1) -> str: |
|
|
""" |
|
|
Browse and list files in the repository with flexible filtering options. |
|
|
|
|
|
PURPOSE: |
|
|
Use this tool to explore the file structure of the Transformers library. |
|
|
Supports directory scoping, glob patterns, and recursive/non-recursive modes. |
|
|
|
|
|
WHEN TO USE: |
|
|
- To see what files exist in a directory |
|
|
- To find files by pattern (e.g., all Python files, all test files) |
|
|
- To explore the repository structure directory by directory |
|
|
- To find specific file types in specific locations |
|
|
- When you need file paths for use with other tools |
|
|
|
|
|
FILTERING OPTIONS: |
|
|
|
|
|
directory_path: |
|
|
- Empty string '': Search all files in the repository |
|
|
- 'src/transformers/models': Only files under this directory |
|
|
- 'src/transformers/models/bert': Focus on a specific model |
|
|
|
|
|
pattern (glob patterns): |
|
|
- '*': All files (default) |
|
|
- '*.py': Python files only |
|
|
- 'test_*.py': Test files |
|
|
- '*config*': Files with 'config' in name |
|
|
- 'modeling_*.py': Modeling files |
|
|
|
|
|
recursive: |
|
|
- True (default): Include files in subdirectories |
|
|
- False: Only files directly in the specified directory |
|
|
|
|
|
COMMON USE CASES: |
|
|
- All files: list_files_in_directory() |
|
|
- Bert model files: list_files_in_directory('src/transformers/models/bert') |
|
|
- All Python files: list_files_in_directory(pattern='*.py') |
|
|
- Test files only: list_files_in_directory(pattern='test_*.py') |
|
|
- Config files: list_files_in_directory(pattern='*config*') |
|
|
|
|
|
COMPARISON WITH print_tree: |
|
|
- print_tree: Visual hierarchy, includes directories |
|
|
- list_files_in_directory: Flat file list with details, better for finding specific files |
|
|
|
|
|
Args: |
|
|
directory_path: Directory to search in. Empty string for entire repository. |
|
|
pattern: Glob pattern for filename filtering (default: '*' matches all) |
|
|
recursive: Search subdirectories (default: True) |
|
|
limit: Files per page (default: 50) |
|
|
page: Page number for pagination |
|
|
|
|
|
Returns: |
|
|
str: List of matching files with paths, languages, and entity counts |
|
|
""" |
|
|
if knowledge_graph is None: |
|
|
return "Error: Knowledge graph not initialized" |
|
|
|
|
|
try: |
|
|
|
|
|
if isinstance(limit, str): |
|
|
try: |
|
|
limit = int(limit) |
|
|
except ValueError: |
|
|
return f"Error: 'limit' must be an integer, got '{limit}'" |
|
|
|
|
|
|
|
|
if isinstance(page, str): |
|
|
try: |
|
|
page = int(page) |
|
|
except ValueError: |
|
|
return f"Error: 'page' must be an integer, got '{page}'" |
|
|
|
|
|
if limit <= 0: |
|
|
return "Error: limit must be a positive integer" |
|
|
if page < 1: |
|
|
return "Error: 'page' must be a positive integer (1 or greater)" |
|
|
|
|
|
|
|
|
if isinstance(recursive, str): |
|
|
recursive = recursive.lower() in ('true', '1', 'yes') |
|
|
|
|
|
g = knowledge_graph.graph |
|
|
matching_files = [] |
|
|
|
|
|
for nid, n in g.nodes(data=True): |
|
|
node = n['data'] |
|
|
node_type = getattr(node, 'node_type', None) |
|
|
|
|
|
|
|
|
if node_type != 'file': |
|
|
continue |
|
|
|
|
|
file_path = getattr(node, 'path', nid) |
|
|
file_name = getattr(node, 'name', '') |
|
|
|
|
|
|
|
|
if directory_path: |
|
|
if recursive: |
|
|
|
|
|
if not file_path.startswith(directory_path.rstrip('/') + '/') and file_path != directory_path: |
|
|
continue |
|
|
else: |
|
|
|
|
|
parent_dir = '/'.join(file_path.rsplit('/', 1)[:-1]) if '/' in file_path else '' |
|
|
if parent_dir != directory_path.rstrip('/'): |
|
|
continue |
|
|
|
|
|
|
|
|
if pattern and pattern != '*': |
|
|
|
|
|
if not (fnmatch.fnmatch(file_path, pattern) or |
|
|
fnmatch.fnmatch(file_name, pattern) or |
|
|
fnmatch.fnmatch(file_path, f'**/{pattern}')): |
|
|
continue |
|
|
|
|
|
language = getattr(node, 'language', 'Unknown') |
|
|
declared_entities = getattr(node, 'declared_entities', []) |
|
|
|
|
|
matching_files.append({ |
|
|
'path': file_path, |
|
|
'name': file_name, |
|
|
'language': language, |
|
|
'entity_count': len(declared_entities) |
|
|
}) |
|
|
|
|
|
|
|
|
matching_files.sort(key=lambda x: x['path']) |
|
|
|
|
|
if not matching_files: |
|
|
filter_desc = f" in '{directory_path}'" if directory_path else "" |
|
|
pattern_desc = f" matching '{pattern}'" if pattern and pattern != '*' else "" |
|
|
return f"No files found{filter_desc}{pattern_desc}." |
|
|
|
|
|
total = len(matching_files) |
|
|
|
|
|
total_pages = (total + limit - 1) // limit |
|
|
if page > total_pages: |
|
|
return f"Error: Page {page} does not exist. Total pages: {total_pages} (with {total} files at {limit} per page)" |
|
|
|
|
|
start_idx = (page - 1) * limit |
|
|
end_idx = start_idx + limit |
|
|
page_slice = matching_files[start_idx:end_idx] |
|
|
|
|
|
result = f"Files" |
|
|
if directory_path: |
|
|
result += f" in '{directory_path}'" |
|
|
if pattern and pattern != '*': |
|
|
result += f" matching '{pattern}'" |
|
|
result += f" (Page {page}/{total_pages}, {total} total):\n" |
|
|
result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" |
|
|
|
|
|
for i, f in enumerate(page_slice, start=start_idx + 1): |
|
|
result += f"{i}. {f['path']}\n" |
|
|
result += f" Language: {f['language']}, Entities: {f['entity_count']}\n\n" |
|
|
|
|
|
|
|
|
if page < total_pages: |
|
|
result += f"Use page={page + 1} to see the next page\n" |
|
|
|
|
|
return result |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
@observe(as_type="tool") |
|
|
def find_files_importing(module_or_entity: str, limit: int = 30, page: int = 1) -> str: |
|
|
""" |
|
|
Find all files that import or use a specific module, class, or function. |
|
|
|
|
|
PURPOSE: |
|
|
Use this tool to trace import dependencies and understand which parts of the |
|
|
codebase depend on a particular module or entity. |
|
|
|
|
|
WHEN TO USE: |
|
|
- To find all files that import a specific module (e.g., 'torch', 'numpy') |
|
|
- To trace dependencies on a class or function |
|
|
- To understand the impact scope of a module |
|
|
- To find usage patterns of external libraries |
|
|
- For dependency analysis and impact assessment |
|
|
|
|
|
SEARCH BEHAVIOR: |
|
|
- Searches through 'called_entities' metadata |
|
|
- Also scans code chunks for import statement patterns |
|
|
- Matches import, from...import, require, use patterns |
|
|
- Case-insensitive matching |
|
|
|
|
|
WHAT YOU'LL GET: |
|
|
- List of files that import/use the specified module or entity |
|
|
- Match type (called_entity or import_statement) |
|
|
- Matched entity names when applicable |
|
|
|
|
|
EXAMPLE QUERIES: |
|
|
- find_files_importing('torch') -> files using PyTorch |
|
|
- find_files_importing('numpy') -> files using NumPy |
|
|
- find_files_importing('BertModel') -> files using BertModel |
|
|
- find_files_importing('attention') -> files related to attention |
|
|
|
|
|
LIMITATIONS: |
|
|
- May not catch all dynamic imports |
|
|
- Pattern matching may have false positives/negatives |
|
|
- For comprehensive search, combine with search_nodes |
|
|
|
|
|
Args: |
|
|
module_or_entity: Name of the module, class, or function to search for (case-insensitive) |
|
|
limit: Maximum results per page (default: 30) |
|
|
page: Page number for pagination |
|
|
|
|
|
Returns: |
|
|
str: List of files that import or use the specified module/entity, with match details |
|
|
""" |
|
|
if knowledge_graph is None: |
|
|
return "Error: Knowledge graph not initialized" |
|
|
|
|
|
try: |
|
|
|
|
|
if isinstance(limit, str): |
|
|
try: |
|
|
limit = int(limit) |
|
|
except ValueError: |
|
|
return f"Error: 'limit' must be an integer, got '{limit}'" |
|
|
|
|
|
|
|
|
if isinstance(page, str): |
|
|
try: |
|
|
page = int(page) |
|
|
except ValueError: |
|
|
return f"Error: 'page' must be an integer, got '{page}'" |
|
|
|
|
|
if limit <= 0: |
|
|
return "Error: limit must be a positive integer" |
|
|
if page < 1: |
|
|
return "Error: 'page' must be a positive integer (1 or greater)" |
|
|
|
|
|
g = knowledge_graph.graph |
|
|
importing_files = [] |
|
|
search_term = module_or_entity.lower() |
|
|
|
|
|
|
|
|
for nid, n in g.nodes(data=True): |
|
|
node = n['data'] |
|
|
node_type = getattr(node, 'node_type', None) |
|
|
|
|
|
if node_type != 'file': |
|
|
continue |
|
|
|
|
|
file_path = getattr(node, 'path', nid) |
|
|
called_entities = getattr(node, 'called_entities', []) |
|
|
|
|
|
|
|
|
found_in_calls = False |
|
|
matched_entities = [] |
|
|
for entity in called_entities: |
|
|
entity_str = str(entity).lower() if not isinstance(entity, dict) else entity.get('name', '').lower() |
|
|
if search_term in entity_str: |
|
|
found_in_calls = True |
|
|
matched_entities.append(entity_str) |
|
|
|
|
|
if found_in_calls: |
|
|
importing_files.append({ |
|
|
'path': file_path, |
|
|
'name': getattr(node, 'name', ''), |
|
|
'matched_entities': matched_entities[:5], |
|
|
'match_type': 'called_entity' |
|
|
}) |
|
|
continue |
|
|
|
|
|
|
|
|
chunks = knowledge_graph.get_chunks_of_file(file_path) if hasattr(knowledge_graph, 'get_chunks_of_file') else [] |
|
|
for chunk in chunks[:3]: |
|
|
content = getattr(chunk, 'content', '') |
|
|
|
|
|
import_patterns = [ |
|
|
rf'import\s+.*{re.escape(module_or_entity)}', |
|
|
rf'from\s+.*{re.escape(module_or_entity)}.*\s+import', |
|
|
rf'require\s*\(\s*["\'].*{re.escape(module_or_entity)}', |
|
|
rf'use\s+.*{re.escape(module_or_entity)}', |
|
|
] |
|
|
for pattern in import_patterns: |
|
|
if re.search(pattern, content, re.IGNORECASE): |
|
|
if not any(f['path'] == file_path for f in importing_files): |
|
|
importing_files.append({ |
|
|
'path': file_path, |
|
|
'name': getattr(node, 'name', ''), |
|
|
'matched_entities': [], |
|
|
'match_type': 'import_statement' |
|
|
}) |
|
|
break |
|
|
|
|
|
|
|
|
importing_files.sort(key=lambda x: x['path']) |
|
|
|
|
|
if not importing_files: |
|
|
return f"No files found importing '{module_or_entity}'.\n\nTip: Try searching for the module name in code content using search_nodes." |
|
|
|
|
|
total = len(importing_files) |
|
|
|
|
|
total_pages = (total + limit - 1) // limit |
|
|
if page > total_pages: |
|
|
return f"Error: Page {page} does not exist. Total pages: {total_pages} (with {total} files at {limit} per page)" |
|
|
|
|
|
start_idx = (page - 1) * limit |
|
|
end_idx = start_idx + limit |
|
|
page_slice = importing_files[start_idx:end_idx] |
|
|
|
|
|
result = f"Files importing '{module_or_entity}' (Page {page}/{total_pages}, {total} total):\n" |
|
|
result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" |
|
|
|
|
|
for i, f in enumerate(page_slice, start=start_idx + 1): |
|
|
result += f"{i}. {f['path']}\n" |
|
|
result += f" Match type: {f['match_type']}\n" |
|
|
if f['matched_entities']: |
|
|
result += f" Matched: {', '.join(f['matched_entities'][:3])}\n" |
|
|
result += "\n" |
|
|
|
|
|
|
|
|
if page < total_pages: |
|
|
result += f"Use page={page + 1} to see the next page\n" |
|
|
|
|
|
return result |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
@observe(as_type="tool") |
|
|
def get_concept_overview(concept: str, limit: int = 15) -> str: |
|
|
""" |
|
|
Get a high-level overview of how a concept is implemented across the Transformers codebase. |
|
|
|
|
|
PURPOSE: |
|
|
Use this tool for broad exploration of a concept or feature. Aggregates related |
|
|
classes, functions, files, and code snippets into a single comprehensive view. |
|
|
Ideal for initial investigation of a topic. |
|
|
|
|
|
WHEN TO USE: |
|
|
- FIRST STEP when exploring a new concept (before detailed searches) |
|
|
- To understand how a feature is implemented across the codebase |
|
|
- To discover all components related to a concept |
|
|
- To get a bird's-eye view before diving into specifics |
|
|
- When you're not sure where to start investigating |
|
|
|
|
|
SEARCH STRATEGY: |
|
|
This tool combines multiple search approaches: |
|
|
- Searches entity names (classes, functions, methods) containing the concept |
|
|
- Searches file names and paths |
|
|
- Searches chunk content and descriptions |
|
|
- Aggregates results into categorized sections |
|
|
|
|
|
CONCEPT EXAMPLES: |
|
|
- 'attention' -> attention mechanisms across all models |
|
|
- 'embedding' -> embedding layers and utilities |
|
|
- 'tokenizer' -> tokenization components |
|
|
- 'generation' -> text generation utilities |
|
|
- 'config' -> configuration classes |
|
|
- 'cache' -> caching mechanisms |
|
|
- 'rope' -> rotary position embeddings |
|
|
- 'flash' -> flash attention implementations |
|
|
|
|
|
OUTPUT STRUCTURE: |
|
|
- Related Classes: Class definitions matching the concept |
|
|
- Related Functions/Methods: Functions matching the concept |
|
|
- Related Files: Files with concept in path/name |
|
|
- Code Snippets: Relevant code chunks |
|
|
|
|
|
TYPICAL WORKFLOW: |
|
|
1. get_concept_overview('attention') -> see all attention-related components |
|
|
2. Identify specific classes/functions of interest |
|
|
3. go_to_definition or search_nodes for detailed exploration |
|
|
|
|
|
Args: |
|
|
concept: The concept to explore (e.g., 'attention', 'embedding', 'generation', 'tokenizer') |
|
|
limit: Maximum items per category (default: 15) |
|
|
|
|
|
Returns: |
|
|
str: Categorized overview with related classes, functions, files, and code snippets |
|
|
""" |
|
|
if knowledge_graph is None: |
|
|
return "Error: Knowledge graph not initialized" |
|
|
|
|
|
try: |
|
|
|
|
|
if isinstance(limit, str): |
|
|
try: |
|
|
limit = int(limit) |
|
|
except ValueError: |
|
|
return f"Error: 'limit' must be an integer, got '{limit}'" |
|
|
|
|
|
g = knowledge_graph.graph |
|
|
concept_lower = concept.lower() |
|
|
|
|
|
|
|
|
related_classes = [] |
|
|
related_functions = [] |
|
|
related_files = [] |
|
|
related_chunks = [] |
|
|
|
|
|
|
|
|
for nid, n in g.nodes(data=True): |
|
|
node = n['data'] |
|
|
node_type = getattr(node, 'node_type', None) |
|
|
node_name = getattr(node, 'name', '') |
|
|
|
|
|
|
|
|
name_match = concept_lower in node_name.lower() |
|
|
|
|
|
if node_type == 'entity': |
|
|
entity_type = getattr(node, 'entity_type', '') |
|
|
if name_match: |
|
|
if entity_type.lower() == 'class' and len(related_classes) < limit: |
|
|
declaring = getattr(node, 'declaring_chunk_ids', []) |
|
|
related_classes.append({ |
|
|
'name': node_name, |
|
|
'id': nid, |
|
|
'file': declaring[0] if declaring else 'Unknown' |
|
|
}) |
|
|
elif entity_type.lower() in ('function', 'method') and len(related_functions) < limit: |
|
|
declaring = getattr(node, 'declaring_chunk_ids', []) |
|
|
related_functions.append({ |
|
|
'name': node_name, |
|
|
'id': nid, |
|
|
'type': entity_type, |
|
|
'file': declaring[0] if declaring else 'Unknown' |
|
|
}) |
|
|
|
|
|
elif node_type == 'file' and len(related_files) < limit: |
|
|
|
|
|
file_path = getattr(node, 'path', '') |
|
|
if concept_lower in file_path.lower() or name_match: |
|
|
declared = getattr(node, 'declared_entities', []) |
|
|
related_files.append({ |
|
|
'path': file_path, |
|
|
'name': node_name, |
|
|
'entity_count': len(declared) |
|
|
}) |
|
|
|
|
|
elif node_type == 'chunk' and len(related_chunks) < limit // 2: |
|
|
|
|
|
content = getattr(node, 'content', '') |
|
|
description = getattr(node, 'description', '') |
|
|
if concept_lower in content.lower() or concept_lower in (description or '').lower(): |
|
|
file_path = getattr(node, 'path', '') |
|
|
related_chunks.append({ |
|
|
'id': nid, |
|
|
'file': file_path, |
|
|
'content': content |
|
|
}) |
|
|
|
|
|
|
|
|
result = f"Concept Overview: '{concept}'\n" |
|
|
result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" |
|
|
|
|
|
|
|
|
total = len(related_classes) + len(related_functions) + len(related_files) + len(related_chunks) |
|
|
result += f"Found {total} related items across the codebase.\n\n" |
|
|
|
|
|
if related_classes: |
|
|
result += f"📦 Related Classes ({len(related_classes)}):\n" |
|
|
for cls in related_classes[:10]: |
|
|
result += f" • {cls['name']}\n" |
|
|
result += f" File: {cls['file']}\n" |
|
|
if len(related_classes) > 10: |
|
|
result += f" ... and {len(related_classes) - 10} more\n" |
|
|
result += "\n" |
|
|
|
|
|
if related_functions: |
|
|
result += f"⚡ Related Functions/Methods ({len(related_functions)}):\n" |
|
|
for func in related_functions[:10]: |
|
|
result += f" • {func['name']} ({func['type']})\n" |
|
|
result += f" File: {func['file']}\n" |
|
|
if len(related_functions) > 10: |
|
|
result += f" ... and {len(related_functions) - 10} more\n" |
|
|
result += "\n" |
|
|
|
|
|
if related_files: |
|
|
result += f"📄 Related Files ({len(related_files)}):\n" |
|
|
for f in related_files[:10]: |
|
|
result += f" • {f['path']}\n" |
|
|
result += f" Entities: {f['entity_count']}\n" |
|
|
if len(related_files) > 10: |
|
|
result += f" ... and {len(related_files) - 10} more\n" |
|
|
result += "\n" |
|
|
|
|
|
if related_chunks: |
|
|
result += f"📝 Code Snippets ({len(related_chunks)}):\n" |
|
|
for chunk in related_chunks[:5]: |
|
|
result += f" • {chunk['id']}\n" |
|
|
result += f" Content:\n{chunk['content']}\n\n" |
|
|
if len(related_chunks) > 5: |
|
|
result += f" ... and {len(related_chunks) - 5} more\n" |
|
|
|
|
|
if total == 0: |
|
|
result += "No direct matches found.\n\n" |
|
|
result += "Suggestions:\n" |
|
|
result += f" • Try searching with: search_nodes('{concept}')\n" |
|
|
result += f" • Try partial name: search_by_type_and_name('class', '{concept[:4]}')\n" |
|
|
result += f" • Check entity list: list_all_entities(entity_type='class')\n" |
|
|
|
|
|
return result |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
def _get_header_explorer(): |
|
|
html = """ |
|
|
<style> |
|
|
.kge-header-container { |
|
|
background: linear-gradient(314deg, #64748b 0%, #373f4a 100%); |
|
|
padding: 28px 22px; |
|
|
border-radius: 16px; |
|
|
color: white !important; |
|
|
box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1), |
|
|
0 4px 6px -2px rgba(0, 0, 0, 0.05); |
|
|
margin-bottom: 25px; |
|
|
font-family: 'Inter', -apple-system, sans-serif; |
|
|
text-align: center; |
|
|
} |
|
|
|
|
|
.kge-header-title { |
|
|
font-size: 30px; |
|
|
font-weight: 700; |
|
|
margin-bottom: 8px; |
|
|
} |
|
|
|
|
|
.kge-header-subtitle { |
|
|
font-size: 17px; |
|
|
font-weight: 400; |
|
|
margin-bottom: 6px; |
|
|
} |
|
|
|
|
|
.kge-header-link a { |
|
|
color: #d7e8ff; |
|
|
font-weight: 600; |
|
|
text-decoration: none; |
|
|
} |
|
|
|
|
|
.kge-header-link a:hover { |
|
|
text-decoration: underline; |
|
|
} |
|
|
</style> |
|
|
|
|
|
<div class="kge-header-container"> |
|
|
<div class="kge-header-title">Code Knowledge Graph Explorer — 🤗 Transformers Library</div> |
|
|
<div class="kge-header-subtitle"> |
|
|
Explore, query, and understand the structure of the Hugging Face Transformers codebase. |
|
|
</div> |
|
|
<div class="kge-header-link"> |
|
|
<a href="https://www.epita.fr/" target="_blank">EPITA Website</a> |
|
|
</div> |
|
|
</div> |
|
|
""" |
|
|
return html |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_gradio_app(): |
|
|
"""Create and configure the Gradio interface.""" |
|
|
|
|
|
with gr.Blocks(title="", theme=gr.themes.Soft()) as demo: |
|
|
gr.HTML(_get_header_explorer()) |
|
|
|
|
|
def _tool_doc_md(func): |
|
|
doc = (func.__doc__ or "No description available.").strip() |
|
|
|
|
|
return f"**Description:**\n\n```\n{doc}\n```" |
|
|
|
|
|
gr.Markdown(""" |
|
|
|
|
|
Understanding large codebases is essential for software engineers. This Space presents a Code Knowledge Graph MCP Server built around the Hugging Face Transformers library (4,000+ files, 400k+ lines of code). It enables LLM-based coding agents to analyze code structure, follow dependencies, and spot potential improvements. Developed initially for EPITA coding courses, these capabilities make it easier to review, navigate, and understand complex projects such as the Transformers library. |
|
|
""") |
|
|
|
|
|
with gr.Tab("📊 Graph Overview"): |
|
|
stats_btn = gr.Button("Get Graph Statistics", variant="primary") |
|
|
stats_output = gr.Textbox(label="Statistics", lines=20, max_lines=30) |
|
|
stats_btn.click(fn=get_graph_stats, outputs=stats_output) |
|
|
gr.Markdown(_tool_doc_md(get_graph_stats)) |
|
|
|
|
|
with gr.Tab("🔎 Search"): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
search_query = gr.Textbox(label="Search Query", placeholder="Enter search query...") |
|
|
search_limit = gr.Slider(1, 50, value=10, step=1, label="Results per Page") |
|
|
search_page = gr.Slider(1, 100, value=1, step=1, label="Page") |
|
|
search_btn = gr.Button("Search", variant="primary") |
|
|
with gr.Column(): |
|
|
search_output = gr.Textbox(label="Search Results", lines=20, max_lines=30) |
|
|
search_btn.click(fn=search_nodes, inputs=[search_query, search_limit, search_page], outputs=search_output) |
|
|
gr.Markdown(_tool_doc_md(search_nodes)) |
|
|
|
|
|
with gr.Tab("📝 Node Info"): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
node_id_input = gr.Textbox(label="Node ID", placeholder="Enter node ID...") |
|
|
node_info_btn = gr.Button("Get Node Info", variant="primary") |
|
|
node_edges_btn = gr.Button("Get Node Edges", variant="secondary") |
|
|
with gr.Column(): |
|
|
node_output = gr.Textbox(label="Node Information", lines=20, max_lines=30) |
|
|
node_info_btn.click(fn=get_node_info, inputs=node_id_input, outputs=node_output) |
|
|
gr.Markdown("#Get Node Info:" + _tool_doc_md(get_node_info)) |
|
|
node_edges_btn.click(fn=get_node_edges, inputs=node_id_input, outputs=node_output) |
|
|
gr.Markdown("#Get Node Edges:" + _tool_doc_md(get_node_edges)) |
|
|
|
|
|
with gr.Tab("🏗️ Structure"): |
|
|
gr.Markdown("### Repository Tree") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
tree_root = gr.Textbox(label="Root Node ID", value="root", placeholder="root") |
|
|
tree_depth = gr.Slider(1, 10, value=3, step=1, label="Max Depth") |
|
|
tree_btn = gr.Button("Show Tree", variant="primary") |
|
|
with gr.Column(): |
|
|
tree_output = gr.Textbox(label="Tree View", lines=20, max_lines=40) |
|
|
tree_btn.click(fn=print_tree, inputs=[tree_root, tree_depth], outputs=tree_output) |
|
|
gr.Markdown(_tool_doc_md(print_tree)) |
|
|
|
|
|
gr.Markdown("---") |
|
|
gr.Markdown("### File Structure") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
file_path_input = gr.Textbox(label="File Path", placeholder="Enter file path...") |
|
|
file_structure_btn = gr.Button("Get File Structure", variant="primary") |
|
|
with gr.Column(): |
|
|
file_structure_output = gr.Textbox(label="File Structure", lines=20, max_lines=30) |
|
|
file_structure_btn.click(fn=get_file_structure, inputs=file_path_input, outputs=file_structure_output) |
|
|
gr.Markdown(_tool_doc_md(get_file_structure)) |
|
|
|
|
|
with gr.Tab("🎯 Entities"): |
|
|
gr.Markdown("### List All Entities") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
entity_page = gr.Slider(1, 100, value=1, step=1, label="Page") |
|
|
entity_limit = gr.Slider(10, 100, value=50, step=10, label="Per Page") |
|
|
entity_type_filter = gr.Dropdown( |
|
|
choices=["", "class", "function", "method", "variable", "parameter"], |
|
|
label="Filter by Type (optional)", value="" |
|
|
) |
|
|
declared_in_repo = gr.Dropdown( |
|
|
choices=["", "true", "false"], |
|
|
label="Declared in Repo (optional)", |
|
|
value="" |
|
|
) |
|
|
called_in_repo = gr.Dropdown( |
|
|
choices=["", "true", "false"], |
|
|
label="Called in Repo (optional)", |
|
|
value="" |
|
|
) |
|
|
list_entities_btn = gr.Button("List Entities", variant="primary") |
|
|
with gr.Column(): |
|
|
list_entities_output = gr.Textbox(label="Entities", lines=20, max_lines=30) |
|
|
list_entities_btn.click( |
|
|
fn=list_all_entities, |
|
|
inputs=[entity_limit, entity_page, entity_type_filter, declared_in_repo, called_in_repo], |
|
|
outputs=list_entities_output, |
|
|
) |
|
|
gr.Markdown(_tool_doc_md(list_all_entities)) |
|
|
|
|
|
gr.Markdown("---") |
|
|
gr.Markdown("### Go to Definition") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
entity_name_def = gr.Textbox(label="Entity Name", placeholder="Enter entity name...") |
|
|
def_btn = gr.Button("Go to Definition", variant="primary") |
|
|
with gr.Column(): |
|
|
def_output = gr.Textbox(label="Definition", lines=15, max_lines=25) |
|
|
def_btn.click(fn=go_to_definition, inputs=entity_name_def, outputs=def_output) |
|
|
gr.Markdown(_tool_doc_md(go_to_definition)) |
|
|
|
|
|
gr.Markdown("---") |
|
|
gr.Markdown("### Find Usages") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
entity_name_usage = gr.Textbox(label="Entity Name", placeholder="Enter entity name...") |
|
|
usage_limit = gr.Slider(1, 50, value=20, step=1, label="Results per Page") |
|
|
usage_page = gr.Slider(1, 100, value=1, step=1, label="Page") |
|
|
usage_btn = gr.Button("Find Usages", variant="primary") |
|
|
with gr.Column(): |
|
|
usage_output = gr.Textbox(label="Usages", lines=15, max_lines=25) |
|
|
usage_btn.click(fn=find_usages, inputs=[entity_name_usage, usage_limit, usage_page], outputs=usage_output) |
|
|
gr.Markdown(_tool_doc_md(find_usages)) |
|
|
|
|
|
with gr.Tab("🔬 Discovery"): |
|
|
gr.Markdown("### List Nodes by Type") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
node_type_input = gr.Dropdown( |
|
|
choices=["file", "directory", "chunk", "function", "class", "method"], |
|
|
label="Node Type" |
|
|
) |
|
|
type_limit = gr.Slider(1, 100, value=20, step=1, label="Max Results") |
|
|
type_page = gr.Slider(1, 100, value=1, step=1, label="Page") |
|
|
type_btn = gr.Button("List Nodes", variant="primary") |
|
|
with gr.Column(): |
|
|
type_output = gr.Textbox(label="Results", lines=20, max_lines=30) |
|
|
type_btn.click(fn=list_nodes_by_type, inputs=[node_type_input, type_limit, type_page], outputs=type_output) |
|
|
gr.Markdown(_tool_doc_md(list_nodes_by_type)) |
|
|
|
|
|
gr.Markdown("---") |
|
|
gr.Markdown("### Search by Type and Name") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
search_type = gr.Dropdown( |
|
|
choices=["file", "directory", "chunk", "function", "class", "method"], |
|
|
label="Node Type" |
|
|
) |
|
|
search_name = gr.Textbox(label="Name Contains", placeholder="Enter partial name...") |
|
|
search_limit = gr.Slider(1, 100, value=10, step=1, label="Max Results") |
|
|
search_page = gr.Slider(1, 100, value=1, step=1, label="Page") |
|
|
search_partial_allowed = gr.Checkbox(label="Partial Match", value=True) |
|
|
search_type_btn = gr.Button("Search", variant="primary") |
|
|
with gr.Column(): |
|
|
search_type_output = gr.Textbox(label="Results", lines=20, max_lines=30) |
|
|
search_type_btn.click(fn=search_by_type_and_name, inputs=[search_type, search_name, search_limit, search_page, search_partial_allowed], outputs=search_type_output) |
|
|
gr.Markdown(_tool_doc_md(search_by_type_and_name)) |
|
|
|
|
|
with gr.Tab("🔗 Relationships"): |
|
|
gr.Markdown("### Get Neighbors") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
neighbor_node_id = gr.Textbox(label="Node ID", placeholder="Enter node ID...") |
|
|
neighbor_limit = gr.Slider(1, 100, value=20, step=1, label="Max Results") |
|
|
neighbor_page = gr.Slider(1, 100, value=1, step=1, label="Page") |
|
|
neighbor_btn = gr.Button("Get Neighbors", variant="primary") |
|
|
with gr.Column(): |
|
|
neighbor_output = gr.Textbox(label="Neighbors", lines=20, max_lines=30) |
|
|
neighbor_btn.click(fn=get_neighbors, inputs=[neighbor_node_id, neighbor_limit, neighbor_page], outputs=neighbor_output) |
|
|
gr.Markdown(_tool_doc_md(get_neighbors)) |
|
|
|
|
|
gr.Markdown("---") |
|
|
gr.Markdown("### Entity Relationships") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
rel_node_id = gr.Textbox(label="Node ID", placeholder="Enter node ID...") |
|
|
rel_btn = gr.Button("Get Relationships", variant="primary") |
|
|
with gr.Column(): |
|
|
rel_output = gr.Textbox(label="Relationships", lines=20, max_lines=30) |
|
|
rel_btn.click(fn=entity_relationships, inputs=rel_node_id, outputs=rel_output) |
|
|
gr.Markdown(_tool_doc_md(entity_relationships)) |
|
|
|
|
|
gr.Markdown("---") |
|
|
gr.Markdown("### Get Related Chunks") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
related_chunk_id = gr.Textbox(label="Chunk ID", placeholder="Enter chunk ID...") |
|
|
relation_type = gr.Dropdown(choices=["" , "calls", "contains", "declares", "uses"], label="Relation Type", value="calls") |
|
|
related_limit = gr.Slider(1, 100, value=20, step=1, label="Results per Page") |
|
|
related_page = gr.Slider(1, 100, value=1, step=1, label="Page") |
|
|
related_btn = gr.Button("Get Related Chunks", variant="primary") |
|
|
with gr.Column(): |
|
|
related_output = gr.Textbox(label="Related Chunks", lines=20, max_lines=30) |
|
|
related_btn.click(fn=get_related_chunks, inputs=[related_chunk_id, relation_type, related_limit, related_page], outputs=related_output) |
|
|
gr.Markdown(_tool_doc_md(get_related_chunks)) |
|
|
|
|
|
gr.Markdown("---") |
|
|
gr.Markdown("### Find Path Between Nodes") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
path_source = gr.Textbox(label="Source Node ID", placeholder="Enter source node ID...") |
|
|
path_target = gr.Textbox(label="Target Node ID", placeholder="Enter target node ID...") |
|
|
path_depth = gr.Slider(1, 10, value=5, step=1, label="Max Depth") |
|
|
path_btn = gr.Button("Find Path", variant="primary") |
|
|
with gr.Column(): |
|
|
path_output = gr.Textbox(label="Path", lines=20, max_lines=30) |
|
|
path_btn.click(fn=find_path, inputs=[path_source, path_target, path_depth], outputs=path_output) |
|
|
gr.Markdown(_tool_doc_md(find_path)) |
|
|
|
|
|
with gr.Tab("📖 Context"): |
|
|
gr.Markdown("### Get Chunk Context") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
chunk_id_input = gr.Textbox(label="Chunk ID", placeholder="Enter chunk ID...") |
|
|
context_btn = gr.Button("Get Context", variant="primary") |
|
|
with gr.Column(): |
|
|
context_output = gr.Textbox(label="Context", lines=25, max_lines=40) |
|
|
context_btn.click(fn=get_chunk_context, inputs=chunk_id_input, outputs=context_output) |
|
|
gr.Markdown(_tool_doc_md(get_chunk_context)) |
|
|
|
|
|
gr.Markdown("---") |
|
|
gr.Markdown("### Concept Overview") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
concept_input = gr.Textbox(label="Concept", placeholder="e.g., embedding, authentication...") |
|
|
concept_btn = gr.Button("Get Overview", variant="primary") |
|
|
with gr.Column(): |
|
|
concept_output = gr.Textbox(label="Concept Overview", lines=25, max_lines=40) |
|
|
concept_btn.click(fn=get_concept_overview, inputs=concept_input, outputs=concept_output) |
|
|
gr.Markdown(_tool_doc_md(get_concept_overview)) |
|
|
|
|
|
gr.Markdown("---") |
|
|
gr.Markdown("### Get Subgraph") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
subgraph_node = gr.Textbox(label="Center Node ID", placeholder="Enter node ID...") |
|
|
subgraph_depth = gr.Slider(1, 5, value=2, step=1, label="Depth") |
|
|
subgraph_edge_types = gr.Textbox(label="Edge Types (comma-separated, optional)", placeholder="e.g., calls,contains") |
|
|
subgraph_btn = gr.Button("Retrieve Subgraph", variant="primary") |
|
|
with gr.Column(): |
|
|
subgraph_output = gr.Textbox(label="Subgraph", lines=20, max_lines=30) |
|
|
subgraph_btn.click(fn=get_subgraph, inputs=[subgraph_node, subgraph_depth, subgraph_edge_types], outputs=subgraph_output) |
|
|
gr.Markdown(_tool_doc_md(get_subgraph)) |
|
|
|
|
|
with gr.Tab("📁 Files"): |
|
|
gr.Markdown("### List Files in Directory") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
dir_path = gr.Textbox(label="Directory Path (empty for root)", placeholder="e.g., src/") |
|
|
file_pattern = gr.Textbox(label="Pattern", value="*", placeholder="e.g., *.py") |
|
|
file_recursive = gr.Checkbox(label="Recursive", value=True) |
|
|
file_limit = gr.Slider(10, 100, value=50, step=10, label="Results per Page") |
|
|
file_page = gr.Slider(1, 100, value=1, step=1, label="Page") |
|
|
list_files_btn = gr.Button("List Files", variant="primary") |
|
|
with gr.Column(): |
|
|
list_files_output = gr.Textbox(label="Files", lines=20, max_lines=30) |
|
|
list_files_btn.click(fn=list_files_in_directory, inputs=[dir_path, file_pattern, file_recursive, file_limit, file_page], outputs=list_files_output) |
|
|
gr.Markdown(_tool_doc_md(list_files_in_directory)) |
|
|
|
|
|
gr.Markdown("---") |
|
|
gr.Markdown("### Find Files Importing") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
import_module = gr.Textbox(label="Module/Entity Name", placeholder="e.g., torch, numpy...") |
|
|
import_limit = gr.Slider(10, 50, value=30, step=5, label="Results per Page") |
|
|
import_page = gr.Slider(1, 100, value=1, step=1, label="Page") |
|
|
find_imports_btn = gr.Button("Find Files", variant="primary") |
|
|
with gr.Column(): |
|
|
find_imports_output = gr.Textbox(label="Importing Files", lines=20, max_lines=30) |
|
|
find_imports_btn.click(fn=find_files_importing, inputs=[import_module, import_limit, import_page], outputs=find_imports_output) |
|
|
gr.Markdown(_tool_doc_md(find_files_importing)) |
|
|
|
|
|
gr.Markdown("---") |
|
|
gr.Markdown("### Get File Stats") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
stats_path = gr.Textbox(label="File Path", placeholder="Enter file path...") |
|
|
stats_btn = gr.Button("Get Stats", variant="primary") |
|
|
with gr.Column(): |
|
|
stats_output = gr.Textbox(label="Statistics", lines=20, max_lines=30) |
|
|
stats_btn.click(fn=get_file_stats, inputs=stats_path, outputs=stats_output) |
|
|
gr.Markdown(_tool_doc_md(get_file_stats)) |
|
|
|
|
|
with gr.Tab("🔍 Analysis"): |
|
|
gr.Markdown("### Diff Chunks") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
diff_node1 = gr.Textbox(label="First Node ID", placeholder="Enter first node ID...") |
|
|
diff_node2 = gr.Textbox(label="Second Node ID", placeholder="Enter second node ID...") |
|
|
diff_btn = gr.Button("Show Diff", variant="primary") |
|
|
with gr.Column(): |
|
|
diff_output = gr.Textbox(label="Diff Output", lines=25, max_lines=40) |
|
|
diff_btn.click(fn=diff_chunks, inputs=[diff_node1, diff_node2], outputs=diff_output) |
|
|
gr.Markdown(_tool_doc_md(diff_chunks)) |
|
|
|
|
|
return demo |
|
|
|
|
|
|
|
|
def main(): |
|
|
parser = argparse.ArgumentParser(description="Knowledge Graph MCP Server from HuggingFace Dataset") |
|
|
|
|
|
|
|
|
parser.add_argument("--hf-dataset", type=str, default=os.environ.get("HF_DATASET"), |
|
|
help="HuggingFace dataset repo ID (e.g., 'username/dataset-name')") |
|
|
|
|
|
|
|
|
parser.add_argument("--hf-token", type=str, default=os.environ.get("HF_TOKEN"), |
|
|
help="HuggingFace API token for private datasets (or set HF_TOKEN env var)") |
|
|
|
|
|
|
|
|
parser.add_argument("--host", type=str, default="0.0.0.0", help="Host to bind to") |
|
|
parser.add_argument("--port", type=int, default=7860, help="Port to bind to") |
|
|
parser.add_argument("--share", action="store_true", help="Create a public link") |
|
|
|
|
|
|
|
|
parser.add_argument("--no-index", action="store_true", help="Skip indexing nodes") |
|
|
parser.add_argument("--code-index-type", type=str, default="keyword-only", |
|
|
choices=["keyword-only", "embedding-only", "hybrid"], |
|
|
help="Type of code index to use") |
|
|
parser.add_argument("--code-index-backend", type=str, default="lancedb", |
|
|
choices=["lancedb", "weaviate"], |
|
|
help="Backend for code index") |
|
|
|
|
|
args = parser.parse_args() |
|
|
|
|
|
|
|
|
code_index_kwargs = { |
|
|
"index_type": args.code_index_type, |
|
|
"backend": args.code_index_backend, |
|
|
"use_embed": args.code_index_type != "keyword-only", |
|
|
} |
|
|
|
|
|
|
|
|
print("Initializing knowledge graph from HuggingFace dataset...") |
|
|
initialize_knowledge_graph( |
|
|
hf_dataset=args.hf_dataset, |
|
|
hf_token=args.hf_token, |
|
|
index_nodes=not args.no_index, |
|
|
code_index_kwargs=code_index_kwargs |
|
|
) |
|
|
print("Knowledge graph initialized!") |
|
|
|
|
|
|
|
|
demo = create_gradio_app() |
|
|
demo.launch( |
|
|
server_name=args.host, |
|
|
server_port=args.port, |
|
|
share=args.share, |
|
|
mcp_server=True |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|