aman1762 commited on
Commit
58336be
·
verified ·
1 Parent(s): 297146b

Update ingest.py

Browse files
Files changed (1) hide show
  1. ingest.py +19 -12
ingest.py CHANGED
@@ -1,24 +1,31 @@
1
  import os
 
2
  from git import Repo
3
  from chunker import chunk_code
4
 
5
- SUPPORTED_EXT = (".py", ".js", ".java", ".cpp")
 
6
 
7
- def load_repo(repo_url, local_dir="repo"):
8
- if os.path.exists(local_dir):
9
- return local_dir
10
- Repo.clone_from(repo_url, local_dir)
11
- return local_dir
12
 
13
- def ingest_repo(repo_path):
 
 
 
14
  documents = []
15
 
16
  for root, _, files in os.walk(repo_path):
17
  for file in files:
18
- if file.endswith(SUPPORTED_EXT):
19
- path = os.path.join(root, file)
20
- with open(path, "r", errors="ignore") as f:
21
- code = f.read()
22
- documents.extend(chunk_code(path, code))
 
 
 
23
 
24
  return documents
 
1
  import os
2
+ import shutil
3
  from git import Repo
4
  from chunker import chunk_code
5
 
6
+ SUPPORTED_EXTENSIONS = (".py", ".js", ".java", ".cpp")
7
+ BASE_REPO_DIR = "/tmp/user_repo"
8
 
9
+ def load_repo(repo_url: str) -> str:
10
+ # Always start fresh
11
+ if os.path.exists(BASE_REPO_DIR):
12
+ shutil.rmtree(BASE_REPO_DIR)
 
13
 
14
+ Repo.clone_from(repo_url, BASE_REPO_DIR)
15
+ return BASE_REPO_DIR
16
+
17
+ def ingest_repo(repo_path: str):
18
  documents = []
19
 
20
  for root, _, files in os.walk(repo_path):
21
  for file in files:
22
+ if file.endswith(SUPPORTED_EXTENSIONS):
23
+ file_path = os.path.join(root, file)
24
+ try:
25
+ with open(file_path, "r", errors="ignore") as f:
26
+ code = f.read()
27
+ documents.extend(chunk_code(file_path, code))
28
+ except Exception:
29
+ pass
30
 
31
  return documents