Spaces:
Running
Running
| """Utility functions for downloading content from Git repositories.""" | |
| import base64 | |
| import requests | |
| def download_github_file_content(api_url: str, timeout: int = 30) -> str: | |
| """Download file content from GitHub (handles Git LFS files). | |
| Args: | |
| api_url: GitHub API URL for the file (e.g., contents API endpoint) | |
| timeout: Request timeout in seconds (default: 30) | |
| Returns: | |
| File content as a string | |
| Raises: | |
| requests.HTTPError: If the HTTP request fails | |
| ValueError: If the file content cannot be decoded or no content/download_url is found | |
| """ | |
| # Use GitHub API to get file content (handles Git LFS files) | |
| response = requests.get(api_url, timeout=timeout) | |
| response.raise_for_status() | |
| api_data = response.json() | |
| # Get file content - GitHub API handles Git LFS files | |
| # If content is in the response, decode it; otherwise use download_url | |
| if "content" in api_data: | |
| # Decode base64 content | |
| try: | |
| file_content = base64.b64decode(api_data["content"]).decode("utf-8") | |
| except Exception as e: | |
| raise ValueError(f"Failed to decode file content: {e}") | |
| # Check if it's a Git LFS pointer file | |
| if file_content.startswith("version https://git-lfs.github.com/spec/v1"): | |
| # For LFS files, use the download_url which points to the actual file | |
| download_url = api_data.get("download_url") | |
| if not download_url: | |
| raise ValueError("Git LFS file found but no download_url available") | |
| # Download the actual file content | |
| lfs_response = requests.get(download_url, timeout=timeout) | |
| lfs_response.raise_for_status() | |
| file_content = lfs_response.text | |
| elif "download_url" in api_data: | |
| # Large files don't include content, use download_url directly | |
| download_response = requests.get(api_data["download_url"], timeout=timeout) | |
| download_response.raise_for_status() | |
| file_content = download_response.text | |
| else: | |
| raise ValueError("No content or download_url found in API response") | |
| return file_content | |