ai-codebase-intelligence / memory /code_indexer.py
pkraman06's picture
Create code_indexer.py
8dbf12b verified
raw
history blame contribute delete
856 Bytes
import os
from langchain.docstore.document import Document
from memory.vector_store import create_vector_store
SUPPORTED_EXTENSIONS = [".py", ".js", ".ts", ".java", ".cpp"]
def load_code_files(repo_path):
documents = []
for root, _, files in os.walk(repo_path):
for file in files:
if any(file.endswith(ext) for ext in SUPPORTED_EXTENSIONS):
path = os.path.join(root, file)
with open(path, "r", encoding="utf-8") as f:
content = f.read()
documents.append(
Document(
page_content=content,
metadata={"source": path},
)
)
return documents
def index_codebase(repo_path):
docs = load_code_files(repo_path)
return create_vector_store(docs)