File size: 2,165 Bytes
f2c74ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
"""Utility functions for downloading content from Git repositories."""

import base64

import requests


def download_github_file_content(api_url: str, timeout: int = 30) -> str:
    """Download file content from GitHub (handles Git LFS files).

    Args:
        api_url: GitHub API URL for the file (e.g., contents API endpoint)
        timeout: Request timeout in seconds (default: 30)

    Returns:
        File content as a string

    Raises:
        requests.HTTPError: If the HTTP request fails
        ValueError: If the file content cannot be decoded or no content/download_url is found
    """
    # Use GitHub API to get file content (handles Git LFS files)
    response = requests.get(api_url, timeout=timeout)
    response.raise_for_status()

    api_data = response.json()

    # Get file content - GitHub API handles Git LFS files
    # If content is in the response, decode it; otherwise use download_url
    if "content" in api_data:
        # Decode base64 content
        try:
            file_content = base64.b64decode(api_data["content"]).decode("utf-8")
        except Exception as e:
            raise ValueError(f"Failed to decode file content: {e}")

        # Check if it's a Git LFS pointer file
        if file_content.startswith("version https://git-lfs.github.com/spec/v1"):
            # For LFS files, use the download_url which points to the actual file
            download_url = api_data.get("download_url")
            if not download_url:
                raise ValueError("Git LFS file found but no download_url available")
            # Download the actual file content
            lfs_response = requests.get(download_url, timeout=timeout)
            lfs_response.raise_for_status()
            file_content = lfs_response.text
    elif "download_url" in api_data:
        # Large files don't include content, use download_url directly
        download_response = requests.get(api_data["download_url"], timeout=timeout)
        download_response.raise_for_status()
        file_content = download_response.text
    else:
        raise ValueError("No content or download_url found in API response")

    return file_content