jules / services /hf_to_github.py
GraziePrego's picture
Upload folder using huggingface_hub
34450be verified
import sys
import os
import subprocess
import uuid
import requests
import json
from git import Repo
from huggingface_hub import snapshot_download
def get_github_user(token):
try:
response = requests.get(
"https://api.github.com/user",
headers={
"Authorization": f"token {token}",
"Accept": "application/vnd.github.v3+json"
},
timeout=10
)
if response.ok:
return response.json().get("login")
except Exception as e:
print(f"Error fetching GitHub user: {e}")
return None
def create_github_repo(token, name):
try:
# First check if it exists
user = get_github_user(token)
if not user:
return False
check_res = requests.get(
f"https://api.github.com/repos/{user}/{name}",
headers={
"Authorization": f"token {token}",
"Accept": "application/vnd.github.v3+json"
},
timeout=10
)
if check_res.status_code == 200:
print(f"Repository {user}/{name} already exists.")
return True
# Create it
print(f"Creating repository {user}/{name}...")
response = requests.post(
"https://api.github.com/user/repos",
headers={
"Authorization": f"token {token}",
"Accept": "application/vnd.github.v3+json"
},
json={
"name": name,
"private": True,
"description": f"Imported from Hugging Face Space"
},
timeout=10
)
return response.ok
except Exception as e:
print(f"Error creating GitHub repo: {e}")
return False
def parse_hf_repo_id(hf_input):
if not hf_input: return None, None
if "huggingface.co/spaces/" in hf_input:
parts = hf_input.split("huggingface.co/spaces/")[1].split("/")
return parts[0], parts[1]
if "/" in hf_input:
parts = hf_input.split("/")
return parts[0], parts[1]
return None, None
def parse_github_repo_id(git_input):
if not git_input: return None, None
if "github.com/" in git_input:
parts = git_input.split("github.com/")[1].split("/")
return parts[0], parts[1].replace(".git", "")
if "/" in git_input:
parts = git_input.split("/")
return parts[0], parts[1]
return None, git_input
def get_hf_token():
return os.environ.get("HF_TOKEN")
def get_github_token():
return os.environ.get("GITHUB_TOKEN")
def handle_upload(hf_space_input, git_repo_input, branch):
hf_profile, hf_space = parse_hf_repo_id(hf_space_input)
if not hf_profile or not hf_space:
return {"error": f"Invalid HF Space ID: '{hf_space_input}'"}
hf_space_full = f"{hf_profile}/{hf_space}"
git_token = get_github_token()
if not git_token:
return {"error": "GITHUB_TOKEN not found in environment secrets."}
git_profile, git_repo = parse_github_repo_id(git_repo_input)
# If no git_repo provided, use hf_space name
if not git_repo:
git_repo = hf_space
# If no profile provided, fetch it from token
if not git_profile:
git_profile = get_github_user(git_token)
if not git_profile:
return {"error": "Could not determine GitHub user profile from token."}
if not branch:
branch = "main"
hf_token = get_hf_token()
if not hf_token:
return {"error": "HF_TOKEN not found in environment secrets."}
# Ensure Repo exists
create_github_repo(git_token, git_repo)
work_dir = "/tmp/jules_uploads"
os.makedirs(work_dir, exist_ok=True)
upload_dir = os.path.join(work_dir, f"upload_{uuid.uuid4().hex}")
os.makedirs(upload_dir, exist_ok=True)
try:
# 1. Download HF Space
print(f"Downloading {hf_space_full}...")
snapshot_download(
repo_id=hf_space_full,
repo_type="space",
local_dir=upload_dir,
token=hf_token
)
# 2. Target URL
target_url = f"https://{git_token}@github.com/{git_profile}/{git_repo}.git"
# 3. Push to GitHub
repo = Repo.init(upload_dir)
# Setup branch
try:
repo.git.checkout("-b", branch)
except:
repo.git.checkout(branch)
repo.git.add(A=True)
if repo.git.status("--short"):
repo.index.commit(f"Import from HF Space {hf_space_full}")
if "origin" in [remote.name for remote in repo.remotes]:
origin = repo.remote("origin")
origin.set_url(target_url)
else:
origin = repo.create_remote("origin", target_url)
print(f"Pushing to GitHub {git_profile}/{git_repo}...")
repo.git.push("origin", branch, force=True)
return {
"status": "success",
"message": f"{hf_space_full} uploaded to {git_profile}/{git_repo} ({branch})",
"repo_id": f"{git_profile}/{git_repo}"
}
except Exception as e:
return {"error": str(e)}
if __name__ == "__main__":
if len(sys.argv) < 4:
print(json.dumps({"error": "Missing arguments"}))
sys.exit(1)
hf_space = sys.argv[1]
git_repo = sys.argv[2]
branch = sys.argv[3]
result = handle_upload(hf_space, git_repo, branch)
print(json.dumps(result))