File size: 856 Bytes
8dbf12b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import os

from langchain.docstore.document import Document

from memory.vector_store import create_vector_store


SUPPORTED_EXTENSIONS = [".py", ".js", ".ts", ".java", ".cpp"]


def load_code_files(repo_path):
    documents = []

    for root, _, files in os.walk(repo_path):
        for file in files:
            if any(file.endswith(ext) for ext in SUPPORTED_EXTENSIONS):
                path = os.path.join(root, file)

                with open(path, "r", encoding="utf-8") as f:
                    content = f.read()

                documents.append(
                    Document(
                        page_content=content,
                        metadata={"source": path},
                    )
                )

    return documents


def index_codebase(repo_path):
    docs = load_code_files(repo_path)
    return create_vector_store(docs)