File size: 1,692 Bytes
3bc9c63 58336be 10bcf8a 3bc9c63 10bcf8a 58336be 10bcf8a 3bc9c63 58336be 10bcf8a 58336be 3bc9c63 10bcf8a 58336be 10bcf8a 58336be 10bcf8a 58336be 3bc9c63 10bcf8a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import os
import shutil
import zipfile
import nbformat
from git import Repo
from chunker import chunk_code
SUPPORTED_EXTENSIONS = (".py", ".js", ".java", ".cpp", ".txt")
BASE_REPO_DIR = "/tmp/user_repo"
BASE_ZIP_DIR = "/tmp/user_zip"
def load_repo(repo_url: str) -> str:
if os.path.exists(BASE_REPO_DIR):
shutil.rmtree(BASE_REPO_DIR)
Repo.clone_from(repo_url, BASE_REPO_DIR)
return BASE_REPO_DIR
def extract_zip(zip_file) -> str:
if os.path.exists(BASE_ZIP_DIR):
shutil.rmtree(BASE_ZIP_DIR)
os.makedirs(BASE_ZIP_DIR, exist_ok=True)
with zipfile.ZipFile(zip_file, "r") as zip_ref:
zip_ref.extractall(BASE_ZIP_DIR)
return BASE_ZIP_DIR
def ingest_repo(repo_path: str):
documents = []
for root, _, files in os.walk(repo_path):
for file in files:
path = os.path.join(root, file)
if file.endswith(".ipynb"):
documents.extend(parse_notebook(path))
elif file.endswith(SUPPORTED_EXTENSIONS):
try:
with open(path, "r", errors="ignore") as f:
code = f.read()
documents.extend(chunk_code(path, code))
except Exception:
pass
return documents
def parse_notebook(file_path: str):
docs = []
try:
nb = nbformat.read(file_path, as_version=4)
code_cells = [
cell.source for cell in nb.cells if cell.cell_type == "code"
]
combined = "\n\n".join(code_cells)
if len(combined.strip()) > 100:
docs.extend(chunk_code(file_path, combined))
except Exception:
pass
return docs
|