Spaces:
Running
Running
File size: 2,165 Bytes
f2c74ae | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | """Utility functions for downloading content from Git repositories."""
import base64
import requests
def download_github_file_content(api_url: str, timeout: int = 30) -> str:
"""Download file content from GitHub (handles Git LFS files).
Args:
api_url: GitHub API URL for the file (e.g., contents API endpoint)
timeout: Request timeout in seconds (default: 30)
Returns:
File content as a string
Raises:
requests.HTTPError: If the HTTP request fails
ValueError: If the file content cannot be decoded or no content/download_url is found
"""
# Use GitHub API to get file content (handles Git LFS files)
response = requests.get(api_url, timeout=timeout)
response.raise_for_status()
api_data = response.json()
# Get file content - GitHub API handles Git LFS files
# If content is in the response, decode it; otherwise use download_url
if "content" in api_data:
# Decode base64 content
try:
file_content = base64.b64decode(api_data["content"]).decode("utf-8")
except Exception as e:
raise ValueError(f"Failed to decode file content: {e}")
# Check if it's a Git LFS pointer file
if file_content.startswith("version https://git-lfs.github.com/spec/v1"):
# For LFS files, use the download_url which points to the actual file
download_url = api_data.get("download_url")
if not download_url:
raise ValueError("Git LFS file found but no download_url available")
# Download the actual file content
lfs_response = requests.get(download_url, timeout=timeout)
lfs_response.raise_for_status()
file_content = lfs_response.text
elif "download_url" in api_data:
# Large files don't include content, use download_url directly
download_response = requests.get(api_data["download_url"], timeout=timeout)
download_response.raise_for_status()
file_content = download_response.text
else:
raise ValueError("No content or download_url found in API response")
return file_content
|