code-crawler / code_chatbot /code_symbols.py
Asish Karthikeya Gogineni
Refactor: Upgraded to Agentic Chatbot with AST & Call Graph support
5b89d45
raw
history blame
3.29 kB
"""Utilities to extract code symbols (class and method names) from code files."""
import logging
from typing import List, Tuple, Optional
from tree_sitter import Node
from code_chatbot.chunker import StructuralChunker
logger = logging.getLogger(__name__)
def _extract_classes_and_methods(node: Node, acc: List[Tuple[Optional[str], Optional[str]]], parent_class: Optional[str] = None, content: str = ""):
"""Extracts classes and methods from a tree-sitter node and places them in the `acc` accumulator.
Args:
node: The tree-sitter node to traverse
acc: Accumulator list to store (class_name, method_name) tuples
parent_class: Name of the parent class (if any)
content: The file content as string (for extracting names)
"""
if node.type in ["class_definition", "class_declaration"]:
class_name_node = node.child_by_field_name("name")
if class_name_node:
class_name = content[class_name_node.start_byte:class_name_node.end_byte]
if class_name:
acc.append((class_name, None))
# Recursively process children with this class as parent
for child in node.children:
_extract_classes_and_methods(child, acc, class_name, content)
return
elif node.type in ["function_definition", "method_definition"]:
function_name_node = node.child_by_field_name("name")
if function_name_node:
method_name = content[function_name_node.start_byte:function_name_node.end_byte]
if method_name:
acc.append((parent_class, method_name))
# Don't go deeper into method bodies (we're not extracting nested functions)
return
else:
# Recursively process children
for child in node.children:
_extract_classes_and_methods(child, acc, parent_class, content)
def get_code_symbols(file_path: str, content: str) -> List[Tuple[Optional[str], Optional[str]]]:
"""Extracts code symbols from a file.
Code symbols are tuples of the form (class_name, method_name).
For classes, method_name is None.
For methods that do not belong to a class, class_name is None.
Args:
file_path: Path to the file
content: Content of the file as a string
Returns:
List of (class_name, method_name) tuples
"""
if not StructuralChunker.is_code_file(file_path):
return []
if not content:
return []
logger.debug(f"Extracting code symbols from {file_path}")
# Try to parse the file using the chunker's parsing logic
try:
ext = file_path.split('.')[-1].lower()
chunker = StructuralChunker()
if ext not in chunker.parsers:
return []
parser = chunker.parsers[ext]
tree = parser.parse(bytes(content, "utf8"))
if not tree or not tree.root_node.children:
return []
classes_and_methods = []
_extract_classes_and_methods(tree.root_node, classes_and_methods, None, content)
return classes_and_methods
except Exception as e:
logger.warning(f"Failed to extract code symbols from {file_path}: {e}")
return []