File size: 1,692 Bytes
3bc9c63
58336be
10bcf8a
 
3bc9c63
 
 
10bcf8a
58336be
10bcf8a
3bc9c63
58336be
 
 
 
 
 
10bcf8a
 
 
 
 
 
 
 
 
 
58336be
3bc9c63
 
 
 
10bcf8a
 
 
 
 
 
58336be
10bcf8a
58336be
10bcf8a
58336be
 
3bc9c63
 
10bcf8a
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import os
import shutil
import zipfile
import nbformat
from git import Repo
from chunker import chunk_code

SUPPORTED_EXTENSIONS = (".py", ".js", ".java", ".cpp", ".txt")
BASE_REPO_DIR = "/tmp/user_repo"
BASE_ZIP_DIR = "/tmp/user_zip"

def load_repo(repo_url: str) -> str:
    if os.path.exists(BASE_REPO_DIR):
        shutil.rmtree(BASE_REPO_DIR)
    Repo.clone_from(repo_url, BASE_REPO_DIR)
    return BASE_REPO_DIR

def extract_zip(zip_file) -> str:
    if os.path.exists(BASE_ZIP_DIR):
        shutil.rmtree(BASE_ZIP_DIR)
    os.makedirs(BASE_ZIP_DIR, exist_ok=True)

    with zipfile.ZipFile(zip_file, "r") as zip_ref:
        zip_ref.extractall(BASE_ZIP_DIR)

    return BASE_ZIP_DIR

def ingest_repo(repo_path: str):
    documents = []

    for root, _, files in os.walk(repo_path):
        for file in files:
            path = os.path.join(root, file)

            if file.endswith(".ipynb"):
                documents.extend(parse_notebook(path))

            elif file.endswith(SUPPORTED_EXTENSIONS):
                try:
                    with open(path, "r", errors="ignore") as f:
                        code = f.read()
                        documents.extend(chunk_code(path, code))
                except Exception:
                    pass

    return documents

def parse_notebook(file_path: str):
    docs = []
    try:
        nb = nbformat.read(file_path, as_version=4)
        code_cells = [
            cell.source for cell in nb.cells if cell.cell_type == "code"
        ]
        combined = "\n\n".join(code_cells)
        if len(combined.strip()) > 100:
            docs.extend(chunk_code(file_path, combined))
    except Exception:
        pass
    return docs