""" ๐Ÿš€ GitHub โ†’ Hugging Face Spaces Importer โ€” Production-Grade Features: - Import any public GitHub repo into an HF Space with one click - Auto-detect SDK (Gradio, Streamlit, Docker, Static) by scanning project structure - Smart validation of all inputs before any network calls - Streaming progress with step-by-step status updates - Robust cleanup of temp files even on failure - Token format validation and permission checking - Branch validation and fallback - Concurrency-limited to prevent abuse - Detailed file tree preview with size calculations """ from __future__ import annotations import logging import os import re import shutil import subprocess import tempfile import traceback from dataclasses import dataclass from enum import Enum from typing import Optional, Generator import gradio as gr from huggingface_hub import HfApi from huggingface_hub.utils import ( HfHubHTTPError, RepositoryNotFoundError, ) # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ # Configuration # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ CLONE_TIMEOUT_SECONDS = 180 MAX_REPO_SIZE_MB = 500 MAX_FILES_TO_UPLOAD = 5_000 CONCURRENCY_LIMIT = 2 UPLOAD_IGNORE_PATTERNS = [ "*.pyc", "__pycache__/", ".git/", ".gitmodules", ".env", ".env.*", "*.log", ".DS_Store", "Thumbs.db", "desktop.ini", "node_modules/", ".venv/", "venv/", "env/", ".tox/", ".nox/", ".mypy_cache/", ".pytest_cache/", "*.egg-info/", "dist/", "build/", ".idea/", ".vscode/", "*.swp", "*.swo", "*~", ] # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ # Logging # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ logger = logging.getLogger("github_importer") logging.basicConfig( level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s", ) # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ # Data types # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ class SDK(str, Enum): GRADIO = "gradio" STREAMLIT = "streamlit" DOCKER = "docker" STATIC = "static" AUTO = "auto-detect" @dataclass class ImportResult: success: bool space_url: str = "" sdk_used: str = "" file_count: int = 0 total_size: str = "" error: Optional[str] = None # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ # Input validators # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ GITHUB_URL_PATTERN = re.compile( r"^https?://github\.com/" r"(?P[a-zA-Z0-9\-_.]+)/" r"(?P[a-zA-Z0-9\-_.]+)" r"(/.*)?$" ) HF_SPACE_ID_PATTERN = re.compile( r"^[a-zA-Z0-9\-_.]+/[a-zA-Z0-9\-_.]+$" ) def validate_github_url(url: str) -> tuple[str, str, str]: """Validate and normalize a GitHub URL. Returns (clean_url, owner, repo_name).""" if not url or not url.strip(): raise gr.Error("๐Ÿ”— Please enter a GitHub repository URL.") url = url.strip() if url.startswith("git@github.com:"): raise gr.Error( "๐Ÿ”— SSH URLs are not supported. Please use the HTTPS URL instead.\n" f"Try: https://github.com/{url.split(':')[1].replace('.git', '')}" ) if not url.startswith(("http://", "https://")): if "/" in url and " " not in url: url = f"https://github.com/{url}" gr.Info(f"Auto-prepended https://github.com/ โ†’ {url}") else: raise gr.Error( "๐Ÿ”— Invalid URL format. Expected: https://github.com/owner/repo" ) url = url.rstrip("/") url = re.sub(r"\.git$", "", url) url = re.sub(r"/(tree|blob|commits|pull|issues|releases|actions|wiki)(/.*)?$", "", url) match = GITHUB_URL_PATTERN.match(url) if not match: raise gr.Error( "๐Ÿ”— Could not parse GitHub URL. Expected format:\n" "`https://github.com/owner/repository`\n\n" f"Got: `{url}`" ) owner = match.group("owner") repo_name = match.group("repo") if len(repo_name) > 100: raise gr.Error("๐Ÿ”— Repository name seems unusually long. Please verify the URL.") return url, owner, repo_name def validate_hf_token(token: str) -> str: """Validate HF token format and permissions. Returns cleaned token.""" if not token or not token.strip(): raise gr.Error( "๐Ÿ”‘ Please enter your Hugging Face token.\n\n" "Get one at: https://huggingface.co/settings/tokens\n" "Make sure it has **write** permissions." ) token = token.strip() if not token.startswith("hf_"): raise gr.Error( "๐Ÿ”‘ Invalid token format. HF tokens start with `hf_`.\n\n" "Get a valid token at: https://huggingface.co/settings/tokens" ) if len(token) < 10: raise gr.Error("๐Ÿ”‘ Token is too short. Please paste the full token.") try: api = HfApi(token=token) user_info = api.whoami() except Exception as e: error_msg = str(e).lower() if "401" in error_msg or "unauthorized" in error_msg or "invalid" in error_msg: raise gr.Error( "๐Ÿ”‘ Token is invalid or expired. Please generate a new token at:\n" "https://huggingface.co/settings/tokens" ) raise gr.Error(f"๐Ÿ”‘ Could not verify token: {type(e).__name__}: {e}") username = user_info.get("name") or user_info.get("user") or "" if not username: raise gr.Error("๐Ÿ”‘ Could not determine your username from the token.") return token def validate_space_id(space_id: str, repo_name: str, token: str) -> str: """Validate or auto-generate the HF Space ID.""" if space_id and space_id.strip(): space_id = space_id.strip() if not HF_SPACE_ID_PATTERN.match(space_id): raise gr.Error( "๐Ÿ“ Invalid Space ID format. Expected: `username/space-name`\n\n" "- Only letters, numbers, hyphens, underscores, and dots are allowed\n" f"- Got: `{space_id}`" ) return space_id try: api = HfApi(token=token) user_info = api.whoami() username = user_info.get("name") or user_info.get("user") or "user" except Exception as e: raise gr.Error(f"Could not determine your HF username for auto-naming: {e}") safe_name = re.sub(r"[^a-zA-Z0-9\-_.]", "-", repo_name) safe_name = re.sub(r"-+", "-", safe_name).strip("-") if not safe_name: safe_name = "imported-repo" auto_id = f"{username}/{safe_name}" gr.Info(f"Auto-generated Space ID: **{auto_id}**") return auto_id def validate_branch(branch: str) -> Optional[str]: """Validate branch name. Returns cleaned branch or None.""" if not branch or not branch.strip(): return None branch = branch.strip() if ".." in branch or branch.startswith("/") or branch.endswith("/"): raise gr.Error(f"๐ŸŒฟ Invalid branch name: `{branch}`") if len(branch) > 250: raise gr.Error("๐ŸŒฟ Branch name too long.") if re.search(r'[;&|`$(){}[\]<>!]', branch): raise gr.Error(f"๐ŸŒฟ Branch name contains invalid characters: `{branch}`") return branch # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ # SDK detection # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ def detect_sdk(project_dir: str) -> tuple[str, str]: """Auto-detect SDK by examining project files. Returns (sdk_name, reason).""" files_at_root = set(os.listdir(project_dir)) files_at_root_lower = {f.lower() for f in files_at_root} if "Dockerfile" in files_at_root or "dockerfile" in files_at_root_lower: return SDK.DOCKER.value, "Found Dockerfile in project root" gradio_score = 0 streamlit_score = 0 scanned_files = 0 scan_errors = 0 for root, dirs, fnames in os.walk(project_dir): dirs[:] = [ d for d in dirs if not d.startswith(".") and d not in { "node_modules", "__pycache__", ".git", "venv", ".venv", "env", ".tox", ".nox", "dist", "build", } ] for fname in fnames: if not fname.endswith(".py"): continue scanned_files += 1 fpath = os.path.join(root, fname) try: with open(fpath, "r", errors="replace") as f: content = f.read(50_000) if "import gradio" in content or "from gradio" in content: gradio_score += 2 if "gr.Blocks" in content or "gr.Interface" in content: gradio_score += 3 if ".launch(" in content: gradio_score += 1 if "import streamlit" in content or "from streamlit" in content: streamlit_score += 2 if "st.title" in content or "st.write" in content: streamlit_score += 3 except PermissionError: scan_errors += 1 except Exception as e: scan_errors += 1 logger.debug(f"SDK scan error on {fpath}: {e}") if gradio_score > 0 and gradio_score >= streamlit_score: return SDK.GRADIO.value, f"Detected Gradio imports (score: {gradio_score}, scanned {scanned_files} .py files)" if streamlit_score > 0: return SDK.STREAMLIT.value, f"Detected Streamlit imports (score: {streamlit_score}, scanned {scanned_files} .py files)" for req_file in ["requirements.txt", "pyproject.toml", "setup.cfg", "setup.py"]: req_path = os.path.join(project_dir, req_file) if os.path.exists(req_path): try: with open(req_path, "r", errors="replace") as f: content = f.read().lower() if "gradio" in content: return SDK.GRADIO.value, f"Found 'gradio' in {req_file}" if "streamlit" in content: return SDK.STREAMLIT.value, f"Found 'streamlit' in {req_file}" except Exception: pass if "index.html" in files_at_root_lower: return SDK.STATIC.value, "Found index.html in project root" return SDK.GRADIO.value, f"No framework detected (scanned {scanned_files} .py files) โ€” defaulting to Gradio" # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ # File tree builder # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ def build_file_tree(project_dir: str, max_files: int = 80) -> tuple[str, int, int]: """Build a visual file tree. Returns (tree_string, file_count, total_size_bytes).""" lines = [] file_count = 0 total_size = 0 truncated = False for root, dirs, files in os.walk(project_dir): dirs[:] = sorted(d for d in dirs if not d.startswith(".") and d not in { "node_modules", "__pycache__", ".git", "venv", ".venv", ".tox", }) files = sorted(files) level = root.replace(project_dir, "").count(os.sep) indent = "โ”‚ " * level if level > 0: dirname = os.path.basename(root) lines.append(f"{indent}๐Ÿ“ {dirname}/") for fname in files: if file_count >= max_files: truncated = True break fpath = os.path.join(root, fname) try: fsize = os.path.getsize(fpath) except OSError: fsize = 0 total_size += fsize file_count += 1 file_indent = "โ”‚ " * (level + 1) size_str = format_size(fsize) lines.append(f"{file_indent}๐Ÿ“„ {fname} ({size_str})") if truncated: break if truncated: lines.append(f"\n... and more files (showing first {max_files})") tree = "\n".join(lines) if lines else "(empty repository)" return tree, file_count, total_size def format_size(size_bytes: int) -> str: if size_bytes < 1024: return f"{size_bytes} B" elif size_bytes < 1024 ** 2: return f"{size_bytes / 1024:.1f} KB" elif size_bytes < 1024 ** 3: return f"{size_bytes / (1024 ** 2):.1f} MB" else: return f"{size_bytes / (1024 ** 3):.2f} GB" # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ # Status builder (accumulates steps for the streaming UI) # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ class StatusBuilder: """Accumulates step statuses and renders as markdown.""" def __init__(self): self.steps: list[tuple[str, str, str]] = [] def add(self, emoji: str, label: str, detail: str = ""): self.steps.append((emoji, label, detail)) def update_last(self, emoji: str, label: str, detail: str = ""): if self.steps: self.steps[-1] = (emoji, label, detail) def render(self) -> str: lines = [] for emoji, label, detail in self.steps: line = f"{emoji} **{label}**" if detail: line += f" โ€” {detail}" lines.append(line) return "\n\n".join(lines) # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ # Core import logic (generator for streaming updates) # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ def import_github_to_hf( github_url: str, hf_space_id: str, sdk_choice: str, hf_token: str, private: bool, branch: str, progress=gr.Progress(), ) -> Generator[tuple[str, str], None, None]: """Clone a GitHub repo and push it to a Hugging Face Space.""" status = StatusBuilder() tmpdir: Optional[str] = None try: # โ”€โ”€ Step 0: Validate all inputs โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ progress(0.02, desc="Validating inputs...") status.add("๐Ÿ”„", "Validating inputs") yield status.render(), "" github_url, owner, repo_name = validate_github_url(github_url) hf_token = validate_hf_token(hf_token) hf_space_id = validate_space_id(hf_space_id, repo_name, hf_token) branch_name = validate_branch(branch) status.update_last("โœ…", "Inputs validated", f"`{owner}/{repo_name}` โ†’ `{hf_space_id}`") yield status.render(), "" # โ”€โ”€ Step 1: Clone โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ progress(0.10, desc="Cloning repository...") status.add("๐Ÿ”„", "Step 1/4: Cloning repository", f"`{github_url}`" + (f" (branch: `{branch_name}`)" if branch_name else "")) yield status.render(), "" tmpdir = tempfile.mkdtemp(prefix="ghimport_") logger.info(f"Cloning {github_url} to {tmpdir}") clone_cmd = ["git", "clone", "--depth=1", "--single-branch"] if branch_name: clone_cmd += ["-b", branch_name] clone_cmd += [github_url, tmpdir] try: result = subprocess.run( clone_cmd, capture_output=True, text=True, timeout=CLONE_TIMEOUT_SECONDS, env={**os.environ, "GIT_TERMINAL_PROMPT": "0"}, ) except subprocess.TimeoutExpired: raise gr.Error( f"โฐ Git clone timed out after {CLONE_TIMEOUT_SECONDS} seconds.\n\n" "The repository may be too large or the server may be unreachable.\n" "Try cloning a specific branch with fewer files." ) except FileNotFoundError: raise gr.Error( "๐Ÿ”ง `git` is not installed on this server. " "This is a server configuration issue." ) except OSError as e: raise gr.Error(f"๐Ÿ”ง System error during clone: {e}") if result.returncode != 0: stderr = result.stderr.strip() logger.error(f"Git clone failed: {stderr}") if "not found" in stderr.lower() or "does not exist" in stderr.lower(): raise gr.Error( f"๐Ÿ”— Repository not found: `{github_url}`\n\n" "- Check the URL for typos\n" "- Make sure the repository is **public**\n" "- Private repos require GitHub authentication (not supported)" ) elif "could not read" in stderr.lower() and "branch" in stderr.lower(): raise gr.Error( f"๐ŸŒฟ Branch `{branch_name}` not found in `{owner}/{repo_name}`.\n\n" "Leave the branch field empty to use the default branch, " "or check the branch name on GitHub." ) elif "authentication" in stderr.lower() or "permission" in stderr.lower(): raise gr.Error( f"๐Ÿ”’ Repository requires authentication: `{github_url}`\n\n" "This tool only supports **public** GitHub repositories." ) elif "ssl" in stderr.lower() or "certificate" in stderr.lower(): raise gr.Error( "๐Ÿ” SSL/TLS error connecting to GitHub. " "This is likely a temporary network issue. Please try again." ) else: raise gr.Error( f"โŒ Git clone failed:\n```\n{stderr[:500]}\n```\n\n" "Check the URL and try again." ) git_dir = os.path.join(tmpdir, ".git") if os.path.isdir(git_dir): shutil.rmtree(git_dir, ignore_errors=True) progress(0.30, desc="Analyzing repository...") file_tree, file_count, total_size = build_file_tree(tmpdir) total_size_mb = total_size / (1024 ** 2) if file_count == 0: raise gr.Error("โŒ The cloned repository is empty (no files found).") if total_size_mb > MAX_REPO_SIZE_MB: raise gr.Error( f"๐Ÿ“ฆ Repository too large: {total_size_mb:.0f} MB " f"(limit: {MAX_REPO_SIZE_MB} MB).\n\n" "Try a smaller repository or fork with reduced history." ) if file_count > MAX_FILES_TO_UPLOAD: gr.Warning( f"๐Ÿ“ฆ Repository has {file_count} files (limit: {MAX_FILES_TO_UPLOAD}). " "Some files may be excluded." ) files_md = ( f"### ๐Ÿ“ Repository Files ({file_count} files, {format_size(total_size)})\n" f"```\n{file_tree}\n```" ) status.update_last("โœ…", "Step 1/4: Repository cloned", f"{file_count} files, {format_size(total_size)}") yield status.render(), files_md # โ”€โ”€ Step 2: Detect SDK โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ progress(0.40, desc="Detecting SDK...") status.add("๐Ÿ”„", "Step 2/4: Detecting SDK") yield status.render(), files_md if sdk_choice == SDK.AUTO.value: detected_sdk, detection_reason = detect_sdk(tmpdir) sdk_to_use = detected_sdk sdk_msg = f"Auto-detected **{detected_sdk}** ({detection_reason})" else: sdk_to_use = sdk_choice sdk_msg = f"Using selected SDK: **{sdk_choice}**" status.update_last("โœ…", "Step 2/4: SDK determined", sdk_msg) yield status.render(), files_md # โ”€โ”€ Step 3: Create Space โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ progress(0.50, desc="Creating HF Space...") status.add("๐Ÿ”„", "Step 3/4: Creating Space", f"`{hf_space_id}` ({sdk_to_use})") yield status.render(), files_md api = HfApi(token=hf_token) try: api.create_repo( repo_id=hf_space_id, repo_type="space", space_sdk=sdk_to_use, private=private, exist_ok=True, ) except HfHubHTTPError as e: status_code = getattr(e.response, "status_code", None) if hasattr(e, "response") else None if status_code == 403: raise gr.Error( f"๐Ÿ”‘ Permission denied creating `{hf_space_id}`.\n\n" "Your token may not have write access, or you may not have " "permission to create Spaces in that namespace." ) elif status_code == 409: gr.Warning(f"Space `{hf_space_id}` already exists โ€” will overwrite files.") else: raise gr.Error(f"โŒ Failed to create Space: {e}") except Exception as e: logger.error(f"Space creation error: {e}") traceback.print_exc() raise gr.Error( f"โŒ Failed to create Space `{hf_space_id}`:\n" f"{type(e).__name__}: {e}" ) status.update_last("โœ…", "Step 3/4: Space created", f"[{hf_space_id}](https://huggingface.co/spaces/{hf_space_id})") yield status.render(), files_md # โ”€โ”€ Step 4: Upload files โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ progress(0.60, desc="Uploading files...") status.add("๐Ÿ”„", "Step 4/4: Uploading files", f"{file_count} files to `{hf_space_id}`") yield status.render(), files_md try: api.upload_folder( folder_path=tmpdir, repo_id=hf_space_id, repo_type="space", commit_message=f"Import from {github_url}", ignore_patterns=UPLOAD_IGNORE_PATTERNS, ) except HfHubHTTPError as e: status_code = getattr(e.response, "status_code", None) if hasattr(e, "response") else None if status_code == 413: raise gr.Error( "๐Ÿ“ฆ Upload rejected โ€” files too large for the HF Hub.\n\n" "Try a smaller repository or exclude large binary files." ) raise gr.Error(f"โŒ Upload failed: {e}") except Exception as e: logger.error(f"Upload error: {e}") traceback.print_exc() raise gr.Error( f"โŒ Failed to upload files:\n{type(e).__name__}: {e}" ) progress(0.95, desc="Finalizing...") space_url = f"https://huggingface.co/spaces/{hf_space_id}" status.update_last("โœ…", "Step 4/4: Files uploaded") # โ”€โ”€ Success โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ progress(1.0, desc="Import complete!") final_status = status.render() + f""" --- ## โœ… Import Complete! | Detail | Value | |--------|-------| | **Source** | [{github_url}]({github_url}) | | **Branch** | {branch_name or '(default)'} | | **Space** | [{hf_space_id}]({space_url}) | | **SDK** | {sdk_to_use} | | **Files** | {file_count} | | **Size** | {format_size(total_size)} | | **Visibility** | {'๐Ÿ”’ Private' if private else '๐ŸŒ Public'} | ### ๐Ÿ”— **[Open your Space โ†’]({space_url})** > The Space may take a minute to build. Refresh the link above if it shows "Building". """ yield final_status, files_md gr.Info(f"โœ… Import complete! Space: {space_url}") except gr.Error: raise except MemoryError: logger.error("MemoryError during import") raise gr.Error( "๐Ÿ’ฅ Out of memory! The repository is too large to process. " "Try a smaller repository." ) except KeyboardInterrupt: logger.warning("Import interrupted by user") raise gr.Error("๐Ÿ›‘ Import was interrupted.") except Exception as e: logger.error(f"Unexpected error: {type(e).__name__}: {e}") traceback.print_exc() raise gr.Error( f"๐Ÿ’ฅ An unexpected error occurred:\n" f"{type(e).__name__}: {e}\n\n" "If this persists, please report it as a bug." ) finally: if tmpdir and os.path.exists(tmpdir): try: shutil.rmtree(tmpdir, ignore_errors=True) logger.info(f"Cleaned up temp directory: {tmpdir}") except Exception as e: logger.warning(f"Cleanup warning: {e}") # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ # Gradio UI # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ with gr.Blocks( title="๐Ÿš€ GitHub โ†’ HF Spaces Importer", ) as demo: gr.Markdown(""" # ๐Ÿš€ GitHub โ†’ Hugging Face Spaces Importer Import any **public** GitHub repository directly into a Hugging Face Space. The tool clones the repo, auto-detects the framework, creates the Space, and uploads all files. """) with gr.Group(): gr.Markdown("### ๐Ÿ”— Source Repository") with gr.Row(): with gr.Column(scale=3): github_url_input = gr.Textbox( label="GitHub Repository URL", placeholder="https://github.com/owner/repo", info="Public repo URL. Also accepts owner/repo format.", max_lines=1, ) with gr.Column(scale=1): branch_input = gr.Textbox( label="Branch (optional)", placeholder="main", info="Leave empty for the default branch", max_lines=1, ) with gr.Group(): gr.Markdown("### ๐Ÿค— Destination Space") with gr.Row(): with gr.Column(scale=3): space_id_input = gr.Textbox( label="HF Space ID (optional)", placeholder="your-username/space-name", info="Leave empty to auto-generate from the repo name", max_lines=1, ) with gr.Column(scale=1): sdk_dropdown = gr.Dropdown( choices=[ ("๐Ÿ” Auto-detect", "auto-detect"), ("๐ŸŸ  Gradio", "gradio"), ("๐Ÿ”ด Streamlit", "streamlit"), ("๐Ÿณ Docker", "docker"), ("๐Ÿ“„ Static HTML", "static"), ], value="auto-detect", label="Space SDK", info="Auto-detect scans imports, Dockerfile, and index.html", ) with gr.Group(): gr.Markdown("### ๐Ÿ”‘ Authentication & Options") with gr.Row(): with gr.Column(scale=3): token_input = gr.Textbox( label="Hugging Face Token", type="password", placeholder="hf_...", info="Needs **write** access ยท [Get a token โ†’](https://huggingface.co/settings/tokens)", max_lines=1, ) with gr.Column(scale=1): private_checkbox = gr.Checkbox( label="๐Ÿ”’ Private Space", value=False, info="Only you (and your org) can see it", ) import_btn = gr.Button( "๐Ÿš€ Import to Hugging Face", variant="primary", size="lg", ) with gr.Row(): with gr.Column(scale=2): status_output = gr.Markdown( value="*Enter a GitHub URL and click Import to get started.*", label="Import Status", ) with gr.Column(scale=1): files_output = gr.Markdown( value="", label="Repository Files", ) with gr.Accordion("โ„น๏ธ Notes & Troubleshooting", open=False): gr.Markdown(f""" ### Supported Repositories - **Public** GitHub repositories only (private repos require GitHub auth, which is not supported) - Maximum repository size: **{MAX_REPO_SIZE_MB} MB** after cloning - Clone timeout: **{CLONE_TIMEOUT_SECONDS} seconds** ### SDK Auto-Detection The auto-detector scans your project in this priority order: 1. **Dockerfile** in root โ†’ Docker 2. **Python imports** (`import gradio` / `import streamlit`) โ†’ matching framework 3. **requirements.txt / pyproject.toml** โ†’ checks for framework dependencies 4. **index.html** in root โ†’ Static 5. **Default** โ†’ Gradio (if nothing detected) ### Excluded Files These patterns are excluded during upload: `{', '.join(UPLOAD_IGNORE_PATTERNS[:10])}`, ... ### Common Issues | Problem | Solution | |---------|----------| | "Repository not found" | Check URL, ensure repo is public | | "Branch not found" | Leave branch empty for default, or verify branch name | | "Permission denied" | Ensure your HF token has write access | | "Clone timed out" | Repository may be very large; try a specific branch | | Space shows "Building" | Wait 1โ€“2 minutes for the Space to build and deploy | """) import_btn.click( fn=import_github_to_hf, inputs=[ github_url_input, space_id_input, sdk_dropdown, token_input, private_checkbox, branch_input, ], outputs=[status_output, files_output], concurrency_limit=CONCURRENCY_LIMIT, concurrency_id="github_import", trigger_mode="once", ) demo.queue(default_concurrency_limit=CONCURRENCY_LIMIT, max_size=10) if __name__ == "__main__": demo.launch( show_error=True, theme=gr.themes.Soft(), css=""" footer { display: none !important; } .info-box { background: #f0f7ff; border-radius: 8px; padding: 12px; margin: 8px 0; } """, )