Spaces:

MCP-1st-Birthday
/

code-knowledge-graph-explorer-transformers-library

Sleeping

App Files Files Community

lailaelkoussy commited on Nov 29, 2025

Commit

547246f

1 Parent(s): a0b7cd2

add pagination + better graph stats

Browse files

Files changed (1) hide show

gradio_mcp_space.py +260 -173

gradio_mcp_space.py CHANGED Viewed

@@ -245,7 +245,7 @@ Incoming Edges ({len(incoming)}):
 @observe(as_type="tool")
-def search_nodes(query: str, limit: int = 10) -> str:
     """
     Search for chunk nodes in the knowledge graph by query string.
@@ -253,7 +253,8 @@ def search_nodes(query: str, limit: int = 10) -> str:
     Args:
         query: The search string to match against code index
-        limit: Maximum number of results to return (default: 10)
     Returns:
         str: A formatted string with search results
@@ -269,39 +270,69 @@ def search_nodes(query: str, limit: int = 10) -> str:
             except ValueError:
                 return f"Error: 'limit' must be an integer, got '{limit}'"
         if limit <= 0:
             return "Error: limit must be a positive integer"
-        results = knowledge_graph.code_index.query(query, n_results=limit)
         metadatas = results.get("metadatas", [[]])[0]
         if not metadatas:
             return f"No results found for '{query}'."
-        result = f"Search Results for '{query}' ({len(metadatas)} results):\n"
         result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
-        for i, res in enumerate(metadatas, 1):
             result += f"{i}. ID: {res.get('id', 'N/A')}\n"
             content = res.get('content', '')
             if content:
                 result += f"   Content: {content}\n"
             result += "\n"
         return result
     except Exception as e:
         return f"Error: {str(e)}"
 @observe(as_type="tool")
 def get_graph_stats() -> str:
     """
-    Get overall statistics about the knowledge graph.
-    Includes node and edge counts, types, and relations.
     Returns:
-        str: A formatted string with graph statistics
     """
     if knowledge_graph is None:
         return "Error: Knowledge graph not initialized"
@@ -311,30 +342,92 @@ def get_graph_stats() -> str:
         num_nodes = g.number_of_nodes()
         num_edges = g.number_of_edges()
         node_types = {}
         for _, node_attrs in g.nodes(data=True):
             node_type = getattr(node_attrs['data'], 'node_type', 'Unknown')
             node_types[node_type] = node_types.get(node_type, 0) + 1
         edge_relations = {}
         for _, _, attrs in g.edges(data=True):
             relation = attrs.get('relation', 'Unknown')
             edge_relations[relation] = edge_relations.get(relation, 0) + 1
         result = f"""Knowledge Graph Statistics:
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-Total Nodes: {num_nodes}
-Total Edges: {num_edges}
-Node Types:
 """
         for ntype, count in sorted(node_types.items(), key=lambda x: x[1], reverse=True):
-            result += f"  - {ntype}: {count}\n"
-        result += "\nEdge Relations:\n"
         for relation, count in sorted(edge_relations.items(), key=lambda x: x[1], reverse=True):
-            result += f"  - {relation}: {count}\n"
         return result
     except Exception as e:
@@ -541,7 +634,7 @@ def go_to_definition(entity_name: str) -> str:
 @observe(as_type="tool")
-def find_usages(entity_name: str, limit: int = 20) -> str:
     """
     Retrieve all usages or calls of an entity in the codebase.
@@ -549,7 +642,8 @@ def find_usages(entity_name: str, limit: int = 20) -> str:
     Args:
         entity_name: The name of the entity to retrieve usages for
-        limit: Maximum number of usages to return (default: 20)
     Returns:
         str: A formatted string with usage locations
@@ -565,11 +659,20 @@ def find_usages(entity_name: str, limit: int = 20) -> str:
             except ValueError:
                 return f"Error: 'limit' must be an integer, got '{limit}'"
         if entity_name not in knowledge_graph.entities:
             return f"Error: Entity '{entity_name}' not found in knowledge graph"
         if limit <= 0:
             return "Error: limit must be a positive integer"
         entity_info = knowledge_graph.entities[entity_name]
         calling_chunks = entity_info.get('calling_chunk_ids', [])
@@ -577,17 +680,28 @@ def find_usages(entity_name: str, limit: int = 20) -> str:
         if not calling_chunks:
             return f"Entity '{entity_name}' found but no usages identified."
-        result = f"Usages of '{entity_name}' ({len(calling_chunks)} total):\n"
         result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
-        for i, chunk_id in enumerate(calling_chunks[:limit], 1):
             if chunk_id in knowledge_graph.graph:
                 chunk = knowledge_graph.graph.nodes[chunk_id]['data']
                 result += f"{i}. {chunk.path} (chunk {chunk.order_in_file})\n"
                 result += f"   Content:\n{chunk.content}\n\n"
-        if len(calling_chunks) > limit:
-            result += f"... and {len(calling_chunks) - limit} more usages\n"
         return result
     except Exception as e:
@@ -649,7 +763,7 @@ def get_file_structure(file_path: str) -> str:
 @observe(as_type="tool")
-def get_related_chunks(chunk_id: str, relation_type: str = "calls") -> str:
     """
     Retrieve chunks related to a given chunk by a specific relationship.
@@ -658,6 +772,8 @@ def get_related_chunks(chunk_id: str, relation_type: str = "calls") -> str:
     Args:
         chunk_id: The ID of the chunk to retrieve related chunks for
         relation_type: The type of relationship to filter by (default: 'calls')
     Returns:
         str: A formatted string with related chunks
@@ -669,6 +785,24 @@ def get_related_chunks(chunk_id: str, relation_type: str = "calls") -> str:
         if chunk_id not in knowledge_graph.graph:
             return f"Error: Chunk '{chunk_id}' not found in knowledge graph"
         related = []
         if relation_type == "" or relation_type == "all":
             # Get all outgoing edges regardless of relation type
@@ -692,18 +826,29 @@ def get_related_chunks(chunk_id: str, relation_type: str = "calls") -> str:
         if not related:
             return f"No chunks found with '{relation_type}' relationship from '{chunk_id}'"
-        result = f"Chunks related to '{chunk_id}' via '{relation_type}' ({len(related)} total):\n"
         result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
-        for i, chunk in enumerate(related[:15], 1):
             result += f"{i}. {chunk['id']}\n"
             result += f"   File: {chunk['file_path']}\n"
             if chunk['entity_name']:
                 result += f"   Entity: {chunk['entity_name']}\n"
             result += "\n"
-        if len(related) > 15:
-            result += f"... and {len(related) - 15} more\n"
         return result
     except Exception as e:
@@ -1057,9 +1202,9 @@ def search_by_type_and_name(node_type: str, name_query: str, limit: int = 10, pa
             except ValueError:
                 return f"Error: 'page' must be an integer, got '{page}'"
-        # Convert fuzzy to bool if it's a string
-        if isinstance(fuzzy, str):
-            fuzzy = fuzzy.lower() in ('true', '1', 'yes')
         if limit <= 0:
             return "Error: limit must be a positive integer"
@@ -1070,13 +1215,13 @@ def search_by_type_and_name(node_type: str, name_query: str, limit: int = 10, pa
         matches = []
         query_lower = name_query.lower()
-        # Build regex pattern for fuzzy matching
         # This will match names containing all characters of the query in order
-        if fuzzy:
             # Create pattern that matches query as substring or with characters spread out
             # e.g., "Embed" matches "Embedding", "BertEmbeddings", "EmbedLayer"
-            fuzzy_pattern = '.*'.join(re.escape(c) for c in query_lower)
-            fuzzy_regex = re.compile(fuzzy_pattern, re.IGNORECASE)
         for nid, n in g.nodes(data=True):
             node = n['data']
@@ -1087,9 +1232,9 @@ def search_by_type_and_name(node_type: str, name_query: str, limit: int = 10, pa
             # Check if name matches the query
             name_matches = False
-            if fuzzy:
-                # Fuzzy match: substring match OR regex pattern match
-                if query_lower in node_name.lower() or fuzzy_regex.search(node_name):
                     name_matches = True
             else:
                 # Exact substring match
@@ -1371,7 +1516,7 @@ def get_subgraph(node_id: str, depth: int = 2, edge_types: Optional[str] = None)
 @observe(as_type="tool")
-def list_files_in_directory(directory_path: str = "", pattern: str = "*", recursive: bool = True, limit: int = 50) -> str:
     """
     List files in a directory with optional glob pattern matching.
@@ -1382,7 +1527,8 @@ def list_files_in_directory(directory_path: str = "", pattern: str = "*", recurs
         directory_path: Path to the directory to list (empty string for root/all files)
         pattern: Glob pattern to filter files (e.g., '*.py', 'test_*.py', '**/*.js')
         recursive: Whether to search recursively in subdirectories (default: True)
-        limit: Maximum number of files to return (default: 50)
     Returns:
         str: A formatted string with matching files
@@ -1398,6 +1544,18 @@ def list_files_in_directory(directory_path: str = "", pattern: str = "*", recurs
             except ValueError:
                 return f"Error: 'limit' must be an integer, got '{limit}'"
         # Convert recursive to bool if it's a string
         if isinstance(recursive, str):
             recursive = recursive.lower() in ('true', '1', 'yes')
@@ -1445,9 +1603,6 @@ def list_files_in_directory(directory_path: str = "", pattern: str = "*", recurs
                 'language': language,
                 'entity_count': len(declared_entities)
             })
-            if len(matching_files) >= limit:
-                break
         # Sort by path for consistent ordering
         matching_files.sort(key=lambda x: x['path'])
@@ -1457,118 +1612,31 @@ def list_files_in_directory(directory_path: str = "", pattern: str = "*", recurs
             pattern_desc = f" matching '{pattern}'" if pattern and pattern != '*' else ""
             return f"No files found{filter_desc}{pattern_desc}."
         result = f"Files"
         if directory_path:
             result += f" in '{directory_path}'"
         if pattern and pattern != '*':
             result += f" matching '{pattern}'"
-        result += f" ({len(matching_files)} results):\n"
         result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
-        for i, f in enumerate(matching_files, 1):
             result += f"{i}. {f['path']}\n"
             result += f"   Language: {f['language']}, Entities: {f['entity_count']}\n\n"
-        return result
-    except Exception as e:
-        return f"Error: {str(e)}"
-@observe(as_type="tool")
-def find_classes_inheriting_from(base_class_name: str, limit: int = 20) -> str:
-    """
-    Retrieve all classes that inherit from a given base class.
-    Searches the knowledge graph for class entities that have the specified
-    base class in their inheritance chain.
-    Args:
-        base_class_name: The name of the base class to retrieve subclasses of
-        limit: Maximum number of results to return (default: 20)
-    Returns:
-        str: A formatted string with classes inheriting from the base class
-    """
-    if knowledge_graph is None:
-        return "Error: Knowledge graph not initialized"
-    try:
-        # Convert limit to int if it's a string
-        if isinstance(limit, str):
-            try:
-                limit = int(limit)
-            except ValueError:
-                return f"Error: 'limit' must be an integer, got '{limit}'"
-        g = knowledge_graph.graph
-        inheriting_classes = []
-        base_lower = base_class_name.lower()
-        # First, find all class entities
-        for nid, n in g.nodes(data=True):
-            node = n['data']
-            node_type = getattr(node, 'node_type', None)
-            entity_type = getattr(node, 'entity_type', '')
-            if node_type != 'entity' or entity_type.lower() != 'class':
-                continue
-            class_name = getattr(node, 'name', '')
-            # Check if this class has relationships indicating inheritance
-            # Look for 'inherits', 'extends', or similar relationships
-            for _, target, edge_data in g.out_edges(nid, data=True):
-                relation = edge_data.get('relation', '').lower()
-                target_node = g.nodes[target]['data']
-                target_name = getattr(target_node, 'name', '')
-                if relation in ('inherits', 'extends', 'inherits_from', 'base_class'):
-                    if target_name.lower() == base_lower or base_lower in target_name.lower():
-                        declaring_chunks = getattr(node, 'declaring_chunk_ids', [])
-                        inheriting_classes.append({
-                            'name': class_name,
-                            'id': nid,
-                            'base': target_name,
-                            'file': declaring_chunks[0] if declaring_chunks else 'Unknown'
-                        })
-                        break
-            # Also check called_entities for base class references
-            # (Sometimes inheritance is tracked via calls relationship)
-            called = getattr(node, 'called_entities', [])
-            if any(base_lower in str(c).lower() for c in called):
-                # Check if it's likely an inheritance pattern
-                declaring_chunks = getattr(node, 'declaring_chunk_ids', [])
-                if declaring_chunks:
-                    chunk_id = declaring_chunks[0]
-                    if chunk_id in g:
-                        chunk_node = g.nodes[chunk_id]['data']
-                        content = getattr(chunk_node, 'content', '')
-                        # Look for class definition with inheritance pattern
-                        class_pattern = rf'class\s+{re.escape(class_name)}\s*\([^)]*{re.escape(base_class_name)}'
-                        if re.search(class_pattern, content, re.IGNORECASE):
-                            if not any(c['name'] == class_name for c in inheriting_classes):
-                                inheriting_classes.append({
-                                    'name': class_name,
-                                    'id': nid,
-                                    'base': base_class_name,
-                                    'file': chunk_id
-                                })
-            if len(inheriting_classes) >= limit:
-                break
-        if not inheriting_classes:
-            return f"No classes found inheriting from '{base_class_name}'.\n\nTip: Try searching for the base class name in code content using search_nodes."
-        result = f"Classes inheriting from '{base_class_name}' ({len(inheriting_classes)} results):\n"
-        result += "━━━━━━━━━━━━━━━━━━━━━━━━━���━━━━━━━━━━━━━━\n\n"
-        for i, cls in enumerate(inheriting_classes, 1):
-            result += f"{i}. {cls['name']}\n"
-            result += f"   ID: {cls['id']}\n"
-            result += f"   Inherits from: {cls['base']}\n"
-            result += f"   Defined in: {cls['file']}\n\n"
         return result
     except Exception as e:
@@ -1576,7 +1644,7 @@ def find_classes_inheriting_from(base_class_name: str, limit: int = 20) -> str:
 @observe(as_type="tool")
-def find_files_importing(module_or_entity: str, limit: int = 30) -> str:
     """
     Retrieve all files that import a specific module or entity.
@@ -1584,7 +1652,8 @@ def find_files_importing(module_or_entity: str, limit: int = 30) -> str:
     Args:
         module_or_entity: The name of the module or entity to retrieve imports of
-        limit: Maximum number of results to return (default: 30)
     Returns:
         str: A formatted string with files that import the specified module/entity
@@ -1599,6 +1668,18 @@ def find_files_importing(module_or_entity: str, limit: int = 30) -> str:
                 limit = int(limit)
             except ValueError:
                 return f"Error: 'limit' must be an integer, got '{limit}'"
         g = knowledge_graph.graph
         importing_files = []
@@ -1654,9 +1735,6 @@ def find_files_importing(module_or_entity: str, limit: int = 30) -> str:
                                 'match_type': 'import_statement'
                             })
                         break
-            if len(importing_files) >= limit:
-                break
         # Sort by path
         importing_files.sort(key=lambda x: x['path'])
@@ -1664,16 +1742,30 @@ def find_files_importing(module_or_entity: str, limit: int = 30) -> str:
         if not importing_files:
             return f"No files found importing '{module_or_entity}'.\n\nTip: Try searching for the module name in code content using search_nodes."
-        result = f"Files importing '{module_or_entity}' ({len(importing_files)} results):\n"
         result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
-        for i, f in enumerate(importing_files, 1):
             result += f"{i}. {f['path']}\n"
             result += f"   Match type: {f['match_type']}\n"
             if f['matched_entities']:
                 result += f"   Matched: {', '.join(f['matched_entities'][:3])}\n"
             result += "\n"
         return result
     except Exception as e:
         return f"Error: {str(e)}"
@@ -1848,11 +1940,12 @@ def create_gradio_app():
             with gr.Row():
                 with gr.Column():
                     search_query = gr.Textbox(label="Search Query", placeholder="Enter search query...")
-                    search_limit = gr.Slider(1, 50, value=10, step=1, label="Max Results")
                     search_btn = gr.Button("Search", variant="primary")
                 with gr.Column():
                     search_output = gr.Textbox(label="Search Results", lines=20, max_lines=30)
-            search_btn.click(fn=search_nodes, inputs=[search_query, search_limit], outputs=search_output)
             gr.Markdown(_tool_doc_md(search_nodes))
         with gr.Tab("📝 Node Info"):
@@ -1937,11 +2030,12 @@ def create_gradio_app():
             with gr.Row():
                 with gr.Column():
                     entity_name_usage = gr.Textbox(label="Entity Name", placeholder="Enter entity name...")
-                    usage_limit = gr.Slider(1, 50, value=20, step=1, label="Max Results")
                     usage_btn = gr.Button("Find Usages", variant="primary")
                 with gr.Column():
                     usage_output = gr.Textbox(label="Usages", lines=15, max_lines=25)
-            usage_btn.click(fn=find_usages, inputs=[entity_name_usage, usage_limit], outputs=usage_output)
             gr.Markdown(_tool_doc_md(find_usages))
         with gr.Tab("🔬 Discovery"):
@@ -1971,11 +2065,11 @@ def create_gradio_app():
                     search_name = gr.Textbox(label="Name Contains", placeholder="Enter partial name...")
                     search_limit = gr.Slider(1, 100, value=10, step=1, label="Max Results")
                     search_page = gr.Slider(1, 100, value=1, step=1, label="Page")
-                    search_fuzzy = gr.Checkbox(label="Fuzzy Match", value=True)
                     search_type_btn = gr.Button("Search", variant="primary")
                 with gr.Column():
                     search_type_output = gr.Textbox(label="Results", lines=20, max_lines=30)
-            search_type_btn.click(fn=search_by_type_and_name, inputs=[search_type, search_name, search_limit, search_page, search_fuzzy], outputs=search_type_output)
             gr.Markdown(_tool_doc_md(search_by_type_and_name))
         with gr.Tab("🔗 Relationships"):
@@ -2008,10 +2102,12 @@ def create_gradio_app():
                 with gr.Column():
                     related_chunk_id = gr.Textbox(label="Chunk ID", placeholder="Enter chunk ID...")
                     relation_type = gr.Dropdown(choices=["" , "calls", "contains", "declares", "uses"], label="Relation Type", value="calls")
                     related_btn = gr.Button("Get Related Chunks", variant="primary")
                 with gr.Column():
                     related_output = gr.Textbox(label="Related Chunks", lines=20, max_lines=30)
-            related_btn.click(fn=get_related_chunks, inputs=[related_chunk_id, relation_type], outputs=related_output)
             gr.Markdown(_tool_doc_md(get_related_chunks))
             gr.Markdown("---")
@@ -2027,17 +2123,6 @@ def create_gradio_app():
             path_btn.click(fn=find_path, inputs=[path_source, path_target, path_depth], outputs=path_output)
             gr.Markdown(_tool_doc_md(find_path))
-            gr.Markdown("---")
-            gr.Markdown("### Find Classes Inheriting From")
-            with gr.Row():
-                with gr.Column():
-                    base_class_input = gr.Textbox(label="Base Class Name", placeholder="Enter base class...")
-                    inherit_btn = gr.Button("Find Subclasses", variant="primary")
-                with gr.Column():
-                    inherit_output = gr.Textbox(label="Inheriting Classes", lines=20, max_lines=30)
-            inherit_btn.click(fn=find_classes_inheriting_from, inputs=base_class_input, outputs=inherit_output)
-            gr.Markdown(_tool_doc_md(find_classes_inheriting_from))
         with gr.Tab("📖 Context"):
             gr.Markdown("### Get Chunk Context")
             with gr.Row():
@@ -2080,11 +2165,12 @@ def create_gradio_app():
                     dir_path = gr.Textbox(label="Directory Path (empty for root)", placeholder="e.g., src/")
                     file_pattern = gr.Textbox(label="Pattern", value="*", placeholder="e.g., *.py")
                     file_recursive = gr.Checkbox(label="Recursive", value=True)
-                    file_limit = gr.Slider(10, 100, value=50, step=10, label="Max Results")
                     list_files_btn = gr.Button("List Files", variant="primary")
                 with gr.Column():
                     list_files_output = gr.Textbox(label="Files", lines=20, max_lines=30)
-            list_files_btn.click(fn=list_files_in_directory, inputs=[dir_path, file_pattern, file_recursive, file_limit], outputs=list_files_output)
             gr.Markdown(_tool_doc_md(list_files_in_directory))
             gr.Markdown("---")
@@ -2092,11 +2178,12 @@ def create_gradio_app():
             with gr.Row():
                 with gr.Column():
                     import_module = gr.Textbox(label="Module/Entity Name", placeholder="e.g., torch, numpy...")
-                    import_limit = gr.Slider(10, 50, value=30, step=5, label="Max Results")
                     find_imports_btn = gr.Button("Find Files", variant="primary")
                 with gr.Column():
                     find_imports_output = gr.Textbox(label="Importing Files", lines=20, max_lines=30)
-            find_imports_btn.click(fn=find_files_importing, inputs=[import_module, import_limit], outputs=find_imports_output)
             gr.Markdown(_tool_doc_md(find_files_importing))
             gr.Markdown("---")

 @observe(as_type="tool")
+def search_nodes(query: str, limit: int = 10, page: int = 1) -> str:
     """
     Search for chunk nodes in the knowledge graph by query string.
     Args:
         query: The search string to match against code index
+        limit: Maximum number of results to return per page (default: 10)
+        page: Page number for pagination, 1-indexed (default: 1)
     Returns:
         str: A formatted string with search results
             except ValueError:
                 return f"Error: 'limit' must be an integer, got '{limit}'"
+        # Convert page to int if it's a string
+        if isinstance(page, str):
+            try:
+                page = int(page)
+            except ValueError:
+                return f"Error: 'page' must be an integer, got '{page}'"
         if limit <= 0:
             return "Error: limit must be a positive integer"
+        if page < 1:
+            return "Error: 'page' must be a positive integer (1 or greater)"
+        # Fetch more results to support pagination
+        max_fetch = limit * page
+        results = knowledge_graph.code_index.query(query, n_results=max_fetch)
         metadatas = results.get("metadatas", [[]])[0]
         if not metadatas:
             return f"No results found for '{query}'."
+        total = len(metadatas)
+        # Pagination
+        total_pages = (total + limit - 1) // limit
+        if page > total_pages:
+            return f"Error: Page {page} does not exist. Total pages: {total_pages} (with {total} results at {limit} per page)"
+        start_idx = (page - 1) * limit
+        end_idx = start_idx + limit
+        page_slice = metadatas[start_idx:end_idx]
+        result = f"Search Results for '{query}' (Page {page}/{total_pages}, {total} total):\n"
         result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
+        for i, res in enumerate(page_slice, start=start_idx + 1):
             result += f"{i}. ID: {res.get('id', 'N/A')}\n"
             content = res.get('content', '')
             if content:
                 result += f"   Content: {content}\n"
             result += "\n"
+        # Pagination hint
+        if page < total_pages:
+            result += f"Use page={page + 1} to see the next page\n"
         return result
     except Exception as e:
         return f"Error: {str(e)}"
 @observe(as_type="tool")
 def get_graph_stats() -> str:
     """
+    Get comprehensive statistics about the knowledge graph.
+    Returns detailed information about the repository structure including:
+    - Chunks: Code segments that represent portions of files (functions, classes, etc.)
+    - Entities: Programming constructs like classes, functions, methods, variables
+    - Files and directories in the repository
+    - Relationships between different components
+    For entity nodes, provides a breakdown by entity type (class, function, method, etc.).
     Returns:
+        str: A formatted string with comprehensive graph statistics
     """
     if knowledge_graph is None:
         return "Error: Knowledge graph not initialized"
         num_nodes = g.number_of_nodes()
         num_edges = g.number_of_edges()
+        # Count node types
         node_types = {}
+        entity_breakdown = {}
         for _, node_attrs in g.nodes(data=True):
             node_type = getattr(node_attrs['data'], 'node_type', 'Unknown')
             node_types[node_type] = node_types.get(node_type, 0) + 1
+            # For entity nodes, get entity_type breakdown
+            if node_type == 'entity':
+                entity_type = getattr(node_attrs['data'], 'entity_type', 'Unknown')
+                # Fallback: if entity_type is empty, check entities dictionary
+                if not entity_type:
+                    node_id = node_attrs['data'].id if hasattr(node_attrs['data'], 'id') else None
+                    if node_id and node_id in knowledge_graph.entities:
+                        entity_types = knowledge_graph.entities[node_id].get('type', [])
+                        entity_type = entity_types[0] if entity_types else 'Unknown'
+                entity_breakdown[entity_type] = entity_breakdown.get(entity_type, 0) + 1
+        # Count edge relations
         edge_relations = {}
         for _, _, attrs in g.edges(data=True):
             relation = attrs.get('relation', 'Unknown')
             edge_relations[relation] = edge_relations.get(relation, 0) + 1
+        # Build result
         result = f"""Knowledge Graph Statistics:
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+📊 Overview:
+  Total Nodes: {num_nodes:,}
+  Total Edges: {num_edges:,}
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+📦 Node Types:
 """
+        # Sort node types by count
         for ntype, count in sorted(node_types.items(), key=lambda x: x[1], reverse=True):
+            result += f"  • {ntype}: {count:,}\n"
+            # If this is entity type, show breakdown
+            if ntype == 'entity' and entity_breakdown:
+                result += f"    └─ Entity Breakdown:\n"
+                for etype, ecount in sorted(entity_breakdown.items(), key=lambda x: x[1], reverse=True):
+                    percentage = (ecount / count * 100) if count > 0 else 0
+                    result += f"       ├─ {etype}: {ecount:,} ({percentage:.1f}%)\n"
+        result += f"""
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+🔗 Edge Relations:
+"""
         for relation, count in sorted(edge_relations.items(), key=lambda x: x[1], reverse=True):
+            result += f"  • {relation}: {count:,}\n"
+        # Add explanation section
+        result += f"""
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+ℹ️  Definitions:
+Chunks: Code segments representing logical portions of files. Each chunk
+        contains a section of code (like a function, class, or code block)
+        along with metadata about what entities it declares and calls.
+Entities: Programming constructs identified in the code including:
+          - Classes: Class definitions
+          - Functions: Function definitions
+          - Methods: Class method definitions
+          - Variables: Variable declarations
+          - Parameters: Function/method parameters
+          - Function_call/Method_call: Usage references
+Files: Source code files in the repository
+Directories: Folder structure containing files
+Repo: Root repository node
+Edge Relations:
+  - contains: Parent-child relationships (file contains chunks)
+  - declares: Entity declaration relationships
+  - calls: Entity usage/invocation relationships
+"""
         return result
     except Exception as e:
 @observe(as_type="tool")
+def find_usages(entity_name: str, limit: int = 20, page: int = 1) -> str:
     """
     Retrieve all usages or calls of an entity in the codebase.
     Args:
         entity_name: The name of the entity to retrieve usages for
+        limit: Maximum number of usages to return per page (default: 20)
+        page: Page number for pagination, 1-indexed (default: 1)
     Returns:
         str: A formatted string with usage locations
             except ValueError:
                 return f"Error: 'limit' must be an integer, got '{limit}'"
+        # Convert page to int if it's a string
+        if isinstance(page, str):
+            try:
+                page = int(page)
+            except ValueError:
+                return f"Error: 'page' must be an integer, got '{page}'"
         if entity_name not in knowledge_graph.entities:
             return f"Error: Entity '{entity_name}' not found in knowledge graph"
         if limit <= 0:
             return "Error: limit must be a positive integer"
+        if page < 1:
+            return "Error: 'page' must be a positive integer (1 or greater)"
         entity_info = knowledge_graph.entities[entity_name]
         calling_chunks = entity_info.get('calling_chunk_ids', [])
         if not calling_chunks:
             return f"Entity '{entity_name}' found but no usages identified."
+        total = len(calling_chunks)
+        # Pagination
+        total_pages = (total + limit - 1) // limit
+        if page > total_pages:
+            return f"Error: Page {page} does not exist. Total pages: {total_pages} (with {total} usages at {limit} per page)"
+        start_idx = (page - 1) * limit
+        end_idx = start_idx + limit
+        page_slice = calling_chunks[start_idx:end_idx]
+        result = f"Usages of '{entity_name}' (Page {page}/{total_pages}, {total} total):\n"
         result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
+        for i, chunk_id in enumerate(page_slice, start=start_idx + 1):
             if chunk_id in knowledge_graph.graph:
                 chunk = knowledge_graph.graph.nodes[chunk_id]['data']
                 result += f"{i}. {chunk.path} (chunk {chunk.order_in_file})\n"
                 result += f"   Content:\n{chunk.content}\n\n"
+        # Pagination hint
+        if page < total_pages:
+            result += f"Use page={page + 1} to see the next page\n"
         return result
     except Exception as e:
 @observe(as_type="tool")
+def get_related_chunks(chunk_id: str, relation_type: str = "calls", limit: int = 20, page: int = 1) -> str:
     """
     Retrieve chunks related to a given chunk by a specific relationship.
     Args:
         chunk_id: The ID of the chunk to retrieve related chunks for
         relation_type: The type of relationship to filter by (default: 'calls')
+        limit: Maximum number of results per page (default: 20)
+        page: Page number for pagination, 1-indexed (default: 1)
     Returns:
         str: A formatted string with related chunks
         if chunk_id not in knowledge_graph.graph:
             return f"Error: Chunk '{chunk_id}' not found in knowledge graph"
+        # Convert limit/page to int if they're strings
+        if isinstance(limit, str):
+            try:
+                limit = int(limit)
+            except ValueError:
+                return f"Error: 'limit' must be an integer, got '{limit}'"
+        if isinstance(page, str):
+            try:
+                page = int(page)
+            except ValueError:
+                return f"Error: 'page' must be an integer, got '{page}'"
+        if limit <= 0:
+            return "Error: limit must be a positive integer"
+        if page < 1:
+            return "Error: 'page' must be a positive integer (1 or greater)"
         related = []
         if relation_type == "" or relation_type == "all":
             # Get all outgoing edges regardless of relation type
         if not related:
             return f"No chunks found with '{relation_type}' relationship from '{chunk_id}'"
+        total = len(related)
+        # Pagination
+        total_pages = (total + limit - 1) // limit
+        if page > total_pages:
+            return f"Error: Page {page} does not exist. Total pages: {total_pages} (with {total} results at {limit} per page)"
+        start_idx = (page - 1) * limit
+        end_idx = start_idx + limit
+        page_slice = related[start_idx:end_idx]
+        result = f"Chunks related to '{chunk_id}' via '{relation_type}' (Page {page}/{total_pages}, {total} total):\n"
         result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
+        for i, chunk in enumerate(page_slice, start=start_idx + 1):
             result += f"{i}. {chunk['id']}\n"
             result += f"   File: {chunk['file_path']}\n"
             if chunk['entity_name']:
                 result += f"   Entity: {chunk['entity_name']}\n"
             result += "\n"
+        # Pagination hint
+        if page < total_pages:
+            result += f"Use page={page + 1} to see the next page\n"
         return result
     except Exception as e:
             except ValueError:
                 return f"Error: 'page' must be an integer, got '{page}'"
+        # Convert partial_allowed to bool if it's a string
+        if isinstance(partial_allowed, str):
+            partial_allowed = partial_allowed.lower() in ('true', '1', 'yes')
         if limit <= 0:
             return "Error: limit must be a positive integer"
         matches = []
         query_lower = name_query.lower()
+        # Build regex pattern for partial_allowed matching
         # This will match names containing all characters of the query in order
+        if partial_allowed:
             # Create pattern that matches query as substring or with characters spread out
             # e.g., "Embed" matches "Embedding", "BertEmbeddings", "EmbedLayer"
+            partial_pattern = '.*'.join(re.escape(c) for c in query_lower)
+            partial_regex = re.compile(partial_pattern, re.IGNORECASE)
         for nid, n in g.nodes(data=True):
             node = n['data']
             # Check if name matches the query
             name_matches = False
+            if partial_allowed:
+                # Partial match: substring match OR regex pattern match
+                if query_lower in node_name.lower() or partial_regex.search(node_name):
                     name_matches = True
             else:
                 # Exact substring match
 @observe(as_type="tool")
+def list_files_in_directory(directory_path: str = "", pattern: str = "*", recursive: bool = True, limit: int = 50, page: int = 1) -> str:
     """
     List files in a directory with optional glob pattern matching.
         directory_path: Path to the directory to list (empty string for root/all files)
         pattern: Glob pattern to filter files (e.g., '*.py', 'test_*.py', '**/*.js')
         recursive: Whether to search recursively in subdirectories (default: True)
+        limit: Maximum number of files to return per page (default: 50)
+        page: Page number for pagination, 1-indexed (default: 1)
     Returns:
         str: A formatted string with matching files
             except ValueError:
                 return f"Error: 'limit' must be an integer, got '{limit}'"
+        # Convert page to int if it's a string
+        if isinstance(page, str):
+            try:
+                page = int(page)
+            except ValueError:
+                return f"Error: 'page' must be an integer, got '{page}'"
+        if limit <= 0:
+            return "Error: limit must be a positive integer"
+        if page < 1:
+            return "Error: 'page' must be a positive integer (1 or greater)"
         # Convert recursive to bool if it's a string
         if isinstance(recursive, str):
             recursive = recursive.lower() in ('true', '1', 'yes')
                 'language': language,
                 'entity_count': len(declared_entities)
             })
         # Sort by path for consistent ordering
         matching_files.sort(key=lambda x: x['path'])
             pattern_desc = f" matching '{pattern}'" if pattern and pattern != '*' else ""
             return f"No files found{filter_desc}{pattern_desc}."
+        total = len(matching_files)
+        # Pagination
+        total_pages = (total + limit - 1) // limit
+        if page > total_pages:
+            return f"Error: Page {page} does not exist. Total pages: {total_pages} (with {total} files at {limit} per page)"
+        start_idx = (page - 1) * limit
+        end_idx = start_idx + limit
+        page_slice = matching_files[start_idx:end_idx]
         result = f"Files"
         if directory_path:
             result += f" in '{directory_path}'"
         if pattern and pattern != '*':
             result += f" matching '{pattern}'"
+        result += f" (Page {page}/{total_pages}, {total} total):\n"
         result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
+        for i, f in enumerate(page_slice, start=start_idx + 1):
             result += f"{i}. {f['path']}\n"
             result += f"   Language: {f['language']}, Entities: {f['entity_count']}\n\n"
+        # Pagination hint
+        if page < total_pages:
+            result += f"Use page={page + 1} to see the next page\n"
         return result
     except Exception as e:
 @observe(as_type="tool")
+def find_files_importing(module_or_entity: str, limit: int = 30, page: int = 1) -> str:
     """
     Retrieve all files that import a specific module or entity.
     Args:
         module_or_entity: The name of the module or entity to retrieve imports of
+        limit: Maximum number of results to return per page (default: 30)
+        page: Page number for pagination, 1-indexed (default: 1)
     Returns:
         str: A formatted string with files that import the specified module/entity
                 limit = int(limit)
             except ValueError:
                 return f"Error: 'limit' must be an integer, got '{limit}'"
+        # Convert page to int if it's a string
+        if isinstance(page, str):
+            try:
+                page = int(page)
+            except ValueError:
+                return f"Error: 'page' must be an integer, got '{page}'"
+        if limit <= 0:
+            return "Error: limit must be a positive integer"
+        if page < 1:
+            return "Error: 'page' must be a positive integer (1 or greater)"
         g = knowledge_graph.graph
         importing_files = []
                                 'match_type': 'import_statement'
                             })
                         break
         # Sort by path
         importing_files.sort(key=lambda x: x['path'])
         if not importing_files:
             return f"No files found importing '{module_or_entity}'.\n\nTip: Try searching for the module name in code content using search_nodes."
+        total = len(importing_files)
+        # Pagination
+        total_pages = (total + limit - 1) // limit
+        if page > total_pages:
+            return f"Error: Page {page} does not exist. Total pages: {total_pages} (with {total} files at {limit} per page)"
+        start_idx = (page - 1) * limit
+        end_idx = start_idx + limit
+        page_slice = importing_files[start_idx:end_idx]
+        result = f"Files importing '{module_or_entity}' (Page {page}/{total_pages}, {total} total):\n"
         result += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
+        for i, f in enumerate(page_slice, start=start_idx + 1):
             result += f"{i}. {f['path']}\n"
             result += f"   Match type: {f['match_type']}\n"
             if f['matched_entities']:
                 result += f"   Matched: {', '.join(f['matched_entities'][:3])}\n"
             result += "\n"
+        # Pagination hint
+        if page < total_pages:
+            result += f"Use page={page + 1} to see the next page\n"
         return result
     except Exception as e:
         return f"Error: {str(e)}"
             with gr.Row():
                 with gr.Column():
                     search_query = gr.Textbox(label="Search Query", placeholder="Enter search query...")
+                    search_limit = gr.Slider(1, 50, value=10, step=1, label="Results per Page")
+                    search_page = gr.Slider(1, 100, value=1, step=1, label="Page")
                     search_btn = gr.Button("Search", variant="primary")
                 with gr.Column():
                     search_output = gr.Textbox(label="Search Results", lines=20, max_lines=30)
+            search_btn.click(fn=search_nodes, inputs=[search_query, search_limit, search_page], outputs=search_output)
             gr.Markdown(_tool_doc_md(search_nodes))
         with gr.Tab("📝 Node Info"):
             with gr.Row():
                 with gr.Column():
                     entity_name_usage = gr.Textbox(label="Entity Name", placeholder="Enter entity name...")
+                    usage_limit = gr.Slider(1, 50, value=20, step=1, label="Results per Page")
+                    usage_page = gr.Slider(1, 100, value=1, step=1, label="Page")
                     usage_btn = gr.Button("Find Usages", variant="primary")
                 with gr.Column():
                     usage_output = gr.Textbox(label="Usages", lines=15, max_lines=25)
+            usage_btn.click(fn=find_usages, inputs=[entity_name_usage, usage_limit, usage_page], outputs=usage_output)
             gr.Markdown(_tool_doc_md(find_usages))
         with gr.Tab("🔬 Discovery"):
                     search_name = gr.Textbox(label="Name Contains", placeholder="Enter partial name...")
                     search_limit = gr.Slider(1, 100, value=10, step=1, label="Max Results")
                     search_page = gr.Slider(1, 100, value=1, step=1, label="Page")
+                    search_partial_allowed = gr.Checkbox(label="Partial Match", value=True)
                     search_type_btn = gr.Button("Search", variant="primary")
                 with gr.Column():
                     search_type_output = gr.Textbox(label="Results", lines=20, max_lines=30)
+            search_type_btn.click(fn=search_by_type_and_name, inputs=[search_type, search_name, search_limit, search_page, search_partial_allowed], outputs=search_type_output)
             gr.Markdown(_tool_doc_md(search_by_type_and_name))
         with gr.Tab("🔗 Relationships"):
                 with gr.Column():
                     related_chunk_id = gr.Textbox(label="Chunk ID", placeholder="Enter chunk ID...")
                     relation_type = gr.Dropdown(choices=["" , "calls", "contains", "declares", "uses"], label="Relation Type", value="calls")
+                    related_limit = gr.Slider(1, 100, value=20, step=1, label="Results per Page")
+                    related_page = gr.Slider(1, 100, value=1, step=1, label="Page")
                     related_btn = gr.Button("Get Related Chunks", variant="primary")
                 with gr.Column():
                     related_output = gr.Textbox(label="Related Chunks", lines=20, max_lines=30)
+            related_btn.click(fn=get_related_chunks, inputs=[related_chunk_id, relation_type, related_limit, related_page], outputs=related_output)
             gr.Markdown(_tool_doc_md(get_related_chunks))
             gr.Markdown("---")
             path_btn.click(fn=find_path, inputs=[path_source, path_target, path_depth], outputs=path_output)
             gr.Markdown(_tool_doc_md(find_path))
         with gr.Tab("📖 Context"):
             gr.Markdown("### Get Chunk Context")
             with gr.Row():
                     dir_path = gr.Textbox(label="Directory Path (empty for root)", placeholder="e.g., src/")
                     file_pattern = gr.Textbox(label="Pattern", value="*", placeholder="e.g., *.py")
                     file_recursive = gr.Checkbox(label="Recursive", value=True)
+                    file_limit = gr.Slider(10, 100, value=50, step=10, label="Results per Page")
+                    file_page = gr.Slider(1, 100, value=1, step=1, label="Page")
                     list_files_btn = gr.Button("List Files", variant="primary")
                 with gr.Column():
                     list_files_output = gr.Textbox(label="Files", lines=20, max_lines=30)
+            list_files_btn.click(fn=list_files_in_directory, inputs=[dir_path, file_pattern, file_recursive, file_limit, file_page], outputs=list_files_output)
             gr.Markdown(_tool_doc_md(list_files_in_directory))
             gr.Markdown("---")
             with gr.Row():
                 with gr.Column():
                     import_module = gr.Textbox(label="Module/Entity Name", placeholder="e.g., torch, numpy...")
+                    import_limit = gr.Slider(10, 50, value=30, step=5, label="Results per Page")
+                    import_page = gr.Slider(1, 100, value=1, step=1, label="Page")
                     find_imports_btn = gr.Button("Find Files", variant="primary")
                 with gr.Column():
                     find_imports_output = gr.Textbox(label="Importing Files", lines=20, max_lines=30)
+            find_imports_btn.click(fn=find_files_importing, inputs=[import_module, import_limit, import_page], outputs=find_imports_output)
             gr.Markdown(_tool_doc_md(find_files_importing))
             gr.Markdown("---")