"""Utility functions for downloading content from Git repositories.""" import base64 import requests def download_github_file_content(api_url: str, timeout: int = 30) -> str: """Download file content from GitHub (handles Git LFS files). Args: api_url: GitHub API URL for the file (e.g., contents API endpoint) timeout: Request timeout in seconds (default: 30) Returns: File content as a string Raises: requests.HTTPError: If the HTTP request fails ValueError: If the file content cannot be decoded or no content/download_url is found """ # Use GitHub API to get file content (handles Git LFS files) response = requests.get(api_url, timeout=timeout) response.raise_for_status() api_data = response.json() # Get file content - GitHub API handles Git LFS files # If content is in the response, decode it; otherwise use download_url if "content" in api_data: # Decode base64 content try: file_content = base64.b64decode(api_data["content"]).decode("utf-8") except Exception as e: raise ValueError(f"Failed to decode file content: {e}") # Check if it's a Git LFS pointer file if file_content.startswith("version https://git-lfs.github.com/spec/v1"): # For LFS files, use the download_url which points to the actual file download_url = api_data.get("download_url") if not download_url: raise ValueError("Git LFS file found but no download_url available") # Download the actual file content lfs_response = requests.get(download_url, timeout=timeout) lfs_response.raise_for_status() file_content = lfs_response.text elif "download_url" in api_data: # Large files don't include content, use download_url directly download_response = requests.get(api_data["download_url"], timeout=timeout) download_response.raise_for_status() file_content = download_response.text else: raise ValueError("No content or download_url found in API response") return file_content