nehajiya8 commited on
Commit
0efb19f
·
verified ·
1 Parent(s): 69e2804

Update utils/github_fetcher.py

Browse files
Files changed (1) hide show
  1. utils/github_fetcher.py +49 -37
utils/github_fetcher.py CHANGED
@@ -26,13 +26,13 @@ class GitHubRepoFetcher:
26
  Returns:
27
  List of dictionaries containing file/directory information
28
  """
 
 
29
  path = path.replace('\\', '/')
30
- # Remove 'blob/main/' or 'tree/main/' from path if present
31
  path = path.replace('blob/main/', '').replace('tree/main/', '')
32
  path = path.replace('blob/master/', '').replace('tree/master/', '')
33
 
34
  url = f'{self.base_url}/repos/{owner}/{repo}/contents/{path}'
35
-
36
 
37
  try:
38
  response = requests.get(url, headers=self.headers)
@@ -51,17 +51,17 @@ class GitHubRepoFetcher:
51
  def download_file(self, owner: str, repo: str, path: str, save_path: str = None) -> Optional[Union[str, bytes]]:
52
  """Download a specific file from the repository."""
53
  try:
 
 
 
54
  path = path.replace('blob/main/', '').replace('tree/main/', '')
55
  path = path.replace('blob/master/', '').replace('tree/master/', '')
56
 
57
  url = f'{self.base_url}/repos/{owner}/{repo}/contents/{path}'
58
- print(f"Fetching file from: {url}") # Debug log
59
 
60
  response = requests.get(url, headers=self.headers)
61
- if response.status_code != 200:
62
- print(f"GitHub API error: {response.status_code}")
63
- print(f"Response: {response.text}")
64
- return None
65
 
66
  content = response.json()
67
  if not content.get('content'):
@@ -101,35 +101,47 @@ class GitHubRepoFetcher:
101
  Returns:
102
  Boolean indicating success
103
  """
104
- # Remove 'tree/main/' or 'tree/master/' from path if present
105
- path = path.replace('tree/main/', '').replace('tree/master/', '')
106
-
107
- contents = self.fetch_contents(owner, repo, path)
108
- if not contents:
109
- print(f"Failed to fetch contents for path: {path}")
110
- return False
111
-
112
- print(f"\nProcessing directory: {path or 'root'}")
113
-
114
- success = True
115
- for item in contents:
116
- item_path = item['path']
117
- local_item_path = os.path.join(local_path, os.path.basename(item_path))
118
 
119
- try:
120
- if item['type'] == 'dir':
121
- os.makedirs(local_item_path, exist_ok=True)
122
- if not self.download_directory(owner, repo, item_path, local_item_path):
123
- success = False
124
- else:
125
- print(f"Downloading file: {item_path}")
126
- result = self.download_file(owner, repo, item_path, local_item_path)
127
- if result is None and local_item_path not in self.downloaded_files:
128
- success = False
129
- else:
130
- print(f"Successfully downloaded: {item_path}")
131
- except Exception as e:
132
- print(f"Error processing {item_path}: {str(e)}")
133
- success = False
134
 
135
- return success
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  Returns:
27
  List of dictionaries containing file/directory information
28
  """
29
+ # Normalize path
30
+ path = path.strip('/')
31
  path = path.replace('\\', '/')
 
32
  path = path.replace('blob/main/', '').replace('tree/main/', '')
33
  path = path.replace('blob/master/', '').replace('tree/master/', '')
34
 
35
  url = f'{self.base_url}/repos/{owner}/{repo}/contents/{path}'
 
36
 
37
  try:
38
  response = requests.get(url, headers=self.headers)
 
51
  def download_file(self, owner: str, repo: str, path: str, save_path: str = None) -> Optional[Union[str, bytes]]:
52
  """Download a specific file from the repository."""
53
  try:
54
+ # Normalize path
55
+ path = path.strip('/')
56
+ path = path.replace('\\', '/')
57
  path = path.replace('blob/main/', '').replace('tree/main/', '')
58
  path = path.replace('blob/master/', '').replace('tree/master/', '')
59
 
60
  url = f'{self.base_url}/repos/{owner}/{repo}/contents/{path}'
61
+ print(f"Fetching file from: {url}")
62
 
63
  response = requests.get(url, headers=self.headers)
64
+ response.raise_for_status()
 
 
 
65
 
66
  content = response.json()
67
  if not content.get('content'):
 
101
  Returns:
102
  Boolean indicating success
103
  """
104
+ try:
105
+ # Normalize path
106
+ path = path.strip('/')
107
+ path = path.replace('\\', '/')
108
+ path = path.replace('blob/main/', '').replace('tree/main/', '')
109
+ path = path.replace('blob/master/', '').replace('tree/master/', '')
 
 
 
 
 
 
 
 
110
 
111
+ print(f"Fetching directory: {path} for {owner}/{repo}")
112
+
113
+ contents = self.fetch_contents(owner, repo, path)
114
+ if not contents:
115
+ print(f"Failed to fetch contents for path: {path}")
116
+ return False
117
+
118
+ print(f"Processing directory: {path or 'root'}")
119
+ os.makedirs(local_path, exist_ok=True)
120
+
121
+ success = True
122
+ for item in contents:
123
+ item_path = item['path']
124
+ # Use basename for local path to maintain correct directory structure
125
+ local_item_path = os.path.join(local_path, os.path.basename(item_path))
126
 
127
+ try:
128
+ if item['type'] == 'dir':
129
+ print(f"Found directory: {item_path}")
130
+ if not self.download_directory(owner, repo, item_path, local_item_path):
131
+ success = False
132
+ else:
133
+ print(f"Downloading file: {item_path}")
134
+ result = self.download_file(owner, repo, item_path, local_item_path)
135
+ if result is None and local_item_path not in self.downloaded_files:
136
+ success = False
137
+ else:
138
+ print(f"Successfully downloaded: {item_path}")
139
+ except Exception as e:
140
+ print(f"Error processing {item_path}: {str(e)}")
141
+ success = False
142
+
143
+ return success
144
+
145
+ except Exception as e:
146
+ print(f"Error in download_directory: {str(e)}")
147
+ return False