| import os | |
| from langchain.docstore.document import Document | |
| from memory.vector_store import create_vector_store | |
| SUPPORTED_EXTENSIONS = [".py", ".js", ".ts", ".java", ".cpp"] | |
| def load_code_files(repo_path): | |
| documents = [] | |
| for root, _, files in os.walk(repo_path): | |
| for file in files: | |
| if any(file.endswith(ext) for ext in SUPPORTED_EXTENSIONS): | |
| path = os.path.join(root, file) | |
| with open(path, "r", encoding="utf-8") as f: | |
| content = f.read() | |
| documents.append( | |
| Document( | |
| page_content=content, | |
| metadata={"source": path}, | |
| ) | |
| ) | |
| return documents | |
| def index_codebase(repo_path): | |
| docs = load_code_files(repo_path) | |
| return create_vector_store(docs) |