Spaces:
Sleeping
Sleeping
Update utils/github_fetcher.py
Browse files- utils/github_fetcher.py +49 -37
utils/github_fetcher.py
CHANGED
|
@@ -26,13 +26,13 @@ class GitHubRepoFetcher:
|
|
| 26 |
Returns:
|
| 27 |
List of dictionaries containing file/directory information
|
| 28 |
"""
|
|
|
|
|
|
|
| 29 |
path = path.replace('\\', '/')
|
| 30 |
-
# Remove 'blob/main/' or 'tree/main/' from path if present
|
| 31 |
path = path.replace('blob/main/', '').replace('tree/main/', '')
|
| 32 |
path = path.replace('blob/master/', '').replace('tree/master/', '')
|
| 33 |
|
| 34 |
url = f'{self.base_url}/repos/{owner}/{repo}/contents/{path}'
|
| 35 |
-
|
| 36 |
|
| 37 |
try:
|
| 38 |
response = requests.get(url, headers=self.headers)
|
|
@@ -51,17 +51,17 @@ class GitHubRepoFetcher:
|
|
| 51 |
def download_file(self, owner: str, repo: str, path: str, save_path: str = None) -> Optional[Union[str, bytes]]:
|
| 52 |
"""Download a specific file from the repository."""
|
| 53 |
try:
|
|
|
|
|
|
|
|
|
|
| 54 |
path = path.replace('blob/main/', '').replace('tree/main/', '')
|
| 55 |
path = path.replace('blob/master/', '').replace('tree/master/', '')
|
| 56 |
|
| 57 |
url = f'{self.base_url}/repos/{owner}/{repo}/contents/{path}'
|
| 58 |
-
print(f"Fetching file from: {url}")
|
| 59 |
|
| 60 |
response = requests.get(url, headers=self.headers)
|
| 61 |
-
|
| 62 |
-
print(f"GitHub API error: {response.status_code}")
|
| 63 |
-
print(f"Response: {response.text}")
|
| 64 |
-
return None
|
| 65 |
|
| 66 |
content = response.json()
|
| 67 |
if not content.get('content'):
|
|
@@ -101,35 +101,47 @@ class GitHubRepoFetcher:
|
|
| 101 |
Returns:
|
| 102 |
Boolean indicating success
|
| 103 |
"""
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
return False
|
| 111 |
-
|
| 112 |
-
print(f"\nProcessing directory: {path or 'root'}")
|
| 113 |
-
|
| 114 |
-
success = True
|
| 115 |
-
for item in contents:
|
| 116 |
-
item_path = item['path']
|
| 117 |
-
local_item_path = os.path.join(local_path, os.path.basename(item_path))
|
| 118 |
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
Returns:
|
| 27 |
List of dictionaries containing file/directory information
|
| 28 |
"""
|
| 29 |
+
# Normalize path
|
| 30 |
+
path = path.strip('/')
|
| 31 |
path = path.replace('\\', '/')
|
|
|
|
| 32 |
path = path.replace('blob/main/', '').replace('tree/main/', '')
|
| 33 |
path = path.replace('blob/master/', '').replace('tree/master/', '')
|
| 34 |
|
| 35 |
url = f'{self.base_url}/repos/{owner}/{repo}/contents/{path}'
|
|
|
|
| 36 |
|
| 37 |
try:
|
| 38 |
response = requests.get(url, headers=self.headers)
|
|
|
|
| 51 |
def download_file(self, owner: str, repo: str, path: str, save_path: str = None) -> Optional[Union[str, bytes]]:
|
| 52 |
"""Download a specific file from the repository."""
|
| 53 |
try:
|
| 54 |
+
# Normalize path
|
| 55 |
+
path = path.strip('/')
|
| 56 |
+
path = path.replace('\\', '/')
|
| 57 |
path = path.replace('blob/main/', '').replace('tree/main/', '')
|
| 58 |
path = path.replace('blob/master/', '').replace('tree/master/', '')
|
| 59 |
|
| 60 |
url = f'{self.base_url}/repos/{owner}/{repo}/contents/{path}'
|
| 61 |
+
print(f"Fetching file from: {url}")
|
| 62 |
|
| 63 |
response = requests.get(url, headers=self.headers)
|
| 64 |
+
response.raise_for_status()
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
content = response.json()
|
| 67 |
if not content.get('content'):
|
|
|
|
| 101 |
Returns:
|
| 102 |
Boolean indicating success
|
| 103 |
"""
|
| 104 |
+
try:
|
| 105 |
+
# Normalize path
|
| 106 |
+
path = path.strip('/')
|
| 107 |
+
path = path.replace('\\', '/')
|
| 108 |
+
path = path.replace('blob/main/', '').replace('tree/main/', '')
|
| 109 |
+
path = path.replace('blob/master/', '').replace('tree/master/', '')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
+
print(f"Fetching directory: {path} for {owner}/{repo}")
|
| 112 |
+
|
| 113 |
+
contents = self.fetch_contents(owner, repo, path)
|
| 114 |
+
if not contents:
|
| 115 |
+
print(f"Failed to fetch contents for path: {path}")
|
| 116 |
+
return False
|
| 117 |
+
|
| 118 |
+
print(f"Processing directory: {path or 'root'}")
|
| 119 |
+
os.makedirs(local_path, exist_ok=True)
|
| 120 |
+
|
| 121 |
+
success = True
|
| 122 |
+
for item in contents:
|
| 123 |
+
item_path = item['path']
|
| 124 |
+
# Use basename for local path to maintain correct directory structure
|
| 125 |
+
local_item_path = os.path.join(local_path, os.path.basename(item_path))
|
| 126 |
|
| 127 |
+
try:
|
| 128 |
+
if item['type'] == 'dir':
|
| 129 |
+
print(f"Found directory: {item_path}")
|
| 130 |
+
if not self.download_directory(owner, repo, item_path, local_item_path):
|
| 131 |
+
success = False
|
| 132 |
+
else:
|
| 133 |
+
print(f"Downloading file: {item_path}")
|
| 134 |
+
result = self.download_file(owner, repo, item_path, local_item_path)
|
| 135 |
+
if result is None and local_item_path not in self.downloaded_files:
|
| 136 |
+
success = False
|
| 137 |
+
else:
|
| 138 |
+
print(f"Successfully downloaded: {item_path}")
|
| 139 |
+
except Exception as e:
|
| 140 |
+
print(f"Error processing {item_path}: {str(e)}")
|
| 141 |
+
success = False
|
| 142 |
+
|
| 143 |
+
return success
|
| 144 |
+
|
| 145 |
+
except Exception as e:
|
| 146 |
+
print(f"Error in download_directory: {str(e)}")
|
| 147 |
+
return False
|