Spaces:
Runtime error
Runtime error
reduce upload time
Browse files- api/external_services.py +5 -3
- api/main.py +4 -2
api/external_services.py
CHANGED
|
@@ -51,21 +51,23 @@ class InitiazlizeGithubService:
|
|
| 51 |
|
| 52 |
return False
|
| 53 |
|
| 54 |
-
def load_repo_data(self, owner, repo):
|
| 55 |
if self.validate_owner_repo(owner, repo):
|
| 56 |
loader = GithubRepositoryReader(
|
| 57 |
self.github_client,
|
| 58 |
owner=self.owner,
|
| 59 |
repo=self.repo,
|
| 60 |
filter_file_extensions=(
|
| 61 |
-
[
|
| 62 |
GithubRepositoryReader.FilterType.INCLUDE,
|
| 63 |
),
|
| 64 |
verbose=False,
|
| 65 |
concurrent_requests=25,
|
| 66 |
)
|
| 67 |
|
| 68 |
-
print(
|
|
|
|
|
|
|
| 69 |
|
| 70 |
docs = loader.load_data(branch="main")
|
| 71 |
print("Documents uploaded:")
|
|
|
|
| 51 |
|
| 52 |
return False
|
| 53 |
|
| 54 |
+
def load_repo_data(self, owner, repo, file_type):
|
| 55 |
if self.validate_owner_repo(owner, repo):
|
| 56 |
loader = GithubRepositoryReader(
|
| 57 |
self.github_client,
|
| 58 |
owner=self.owner,
|
| 59 |
repo=self.repo,
|
| 60 |
filter_file_extensions=(
|
| 61 |
+
[file_type],
|
| 62 |
GithubRepositoryReader.FilterType.INCLUDE,
|
| 63 |
),
|
| 64 |
verbose=False,
|
| 65 |
concurrent_requests=25,
|
| 66 |
)
|
| 67 |
|
| 68 |
+
print(
|
| 69 |
+
f"Loading {self.repo} repository by {self.owner}, file type: {file_type}"
|
| 70 |
+
)
|
| 71 |
|
| 72 |
docs = loader.load_data(branch="main")
|
| 73 |
print("Documents uploaded:")
|
api/main.py
CHANGED
|
@@ -31,8 +31,10 @@ async def scrape_and_upload_to_activeloop(repo_request: GitHubRepoRequest):
|
|
| 31 |
print(f"repo from user: {repo_request.githubRepoUrl}")
|
| 32 |
|
| 33 |
owner, repo = github_service.parse_github_url(repo_request.githubRepoUrl)
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
| 36 |
|
| 37 |
return {"status": "success", "message": "Repo processed successfully"}
|
| 38 |
|
|
|
|
| 31 |
print(f"repo from user: {repo_request.githubRepoUrl}")
|
| 32 |
|
| 33 |
owner, repo = github_service.parse_github_url(repo_request.githubRepoUrl)
|
| 34 |
+
file_types = [".py", ".js", ".ts", ".md", "ipynb"]
|
| 35 |
+
for file_type in file_types:
|
| 36 |
+
docs = github_service.load_repo_data(owner, repo, file_type)
|
| 37 |
+
activeloop_service.upload_to_activeloop(docs)
|
| 38 |
|
| 39 |
return {"status": "success", "message": "Repo processed successfully"}
|
| 40 |
|