Refactor: production-grade error handling, progress bars, input validation, SDK scoring, Gradio 6 compat
02d4cf3 verified | """ | |
| π GitHub β Hugging Face Spaces Importer β Production-Grade | |
| Features: | |
| - Import any public GitHub repo into an HF Space with one click | |
| - Auto-detect SDK (Gradio, Streamlit, Docker, Static) by scanning project structure | |
| - Smart validation of all inputs before any network calls | |
| - Streaming progress with step-by-step status updates | |
| - Robust cleanup of temp files even on failure | |
| - Token format validation and permission checking | |
| - Branch validation and fallback | |
| - Concurrency-limited to prevent abuse | |
| - Detailed file tree preview with size calculations | |
| """ | |
| from __future__ import annotations | |
| import logging | |
| import os | |
| import re | |
| import shutil | |
| import subprocess | |
| import tempfile | |
| import traceback | |
| from dataclasses import dataclass | |
| from enum import Enum | |
| from typing import Optional, Generator | |
| import gradio as gr | |
| from huggingface_hub import HfApi | |
| from huggingface_hub.utils import ( | |
| HfHubHTTPError, | |
| RepositoryNotFoundError, | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Configuration | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| CLONE_TIMEOUT_SECONDS = 180 | |
| MAX_REPO_SIZE_MB = 500 | |
| MAX_FILES_TO_UPLOAD = 5_000 | |
| CONCURRENCY_LIMIT = 2 | |
| UPLOAD_IGNORE_PATTERNS = [ | |
| "*.pyc", "__pycache__/", ".git/", ".gitmodules", | |
| ".env", ".env.*", "*.log", | |
| ".DS_Store", "Thumbs.db", "desktop.ini", | |
| "node_modules/", ".venv/", "venv/", "env/", | |
| ".tox/", ".nox/", ".mypy_cache/", ".pytest_cache/", | |
| "*.egg-info/", "dist/", "build/", | |
| ".idea/", ".vscode/", "*.swp", "*.swo", "*~", | |
| ] | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Logging | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| logger = logging.getLogger("github_importer") | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s | %(levelname)s | %(message)s", | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Data types | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class SDK(str, Enum): | |
| GRADIO = "gradio" | |
| STREAMLIT = "streamlit" | |
| DOCKER = "docker" | |
| STATIC = "static" | |
| AUTO = "auto-detect" | |
| class ImportResult: | |
| success: bool | |
| space_url: str = "" | |
| sdk_used: str = "" | |
| file_count: int = 0 | |
| total_size: str = "" | |
| error: Optional[str] = None | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Input validators | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| GITHUB_URL_PATTERN = re.compile( | |
| r"^https?://github\.com/" | |
| r"(?P<owner>[a-zA-Z0-9\-_.]+)/" | |
| r"(?P<repo>[a-zA-Z0-9\-_.]+)" | |
| r"(/.*)?$" | |
| ) | |
| HF_SPACE_ID_PATTERN = re.compile( | |
| r"^[a-zA-Z0-9\-_.]+/[a-zA-Z0-9\-_.]+$" | |
| ) | |
| def validate_github_url(url: str) -> tuple[str, str, str]: | |
| """Validate and normalize a GitHub URL. Returns (clean_url, owner, repo_name).""" | |
| if not url or not url.strip(): | |
| raise gr.Error("π Please enter a GitHub repository URL.") | |
| url = url.strip() | |
| if url.startswith("git@github.com:"): | |
| raise gr.Error( | |
| "π SSH URLs are not supported. Please use the HTTPS URL instead.\n" | |
| f"Try: https://github.com/{url.split(':')[1].replace('.git', '')}" | |
| ) | |
| if not url.startswith(("http://", "https://")): | |
| if "/" in url and " " not in url: | |
| url = f"https://github.com/{url}" | |
| gr.Info(f"Auto-prepended https://github.com/ β {url}") | |
| else: | |
| raise gr.Error( | |
| "π Invalid URL format. Expected: https://github.com/owner/repo" | |
| ) | |
| url = url.rstrip("/") | |
| url = re.sub(r"\.git$", "", url) | |
| url = re.sub(r"/(tree|blob|commits|pull|issues|releases|actions|wiki)(/.*)?$", "", url) | |
| match = GITHUB_URL_PATTERN.match(url) | |
| if not match: | |
| raise gr.Error( | |
| "π Could not parse GitHub URL. Expected format:\n" | |
| "`https://github.com/owner/repository`\n\n" | |
| f"Got: `{url}`" | |
| ) | |
| owner = match.group("owner") | |
| repo_name = match.group("repo") | |
| if len(repo_name) > 100: | |
| raise gr.Error("π Repository name seems unusually long. Please verify the URL.") | |
| return url, owner, repo_name | |
| def validate_hf_token(token: str) -> str: | |
| """Validate HF token format and permissions. Returns cleaned token.""" | |
| if not token or not token.strip(): | |
| raise gr.Error( | |
| "π Please enter your Hugging Face token.\n\n" | |
| "Get one at: https://huggingface.co/settings/tokens\n" | |
| "Make sure it has **write** permissions." | |
| ) | |
| token = token.strip() | |
| if not token.startswith("hf_"): | |
| raise gr.Error( | |
| "π Invalid token format. HF tokens start with `hf_`.\n\n" | |
| "Get a valid token at: https://huggingface.co/settings/tokens" | |
| ) | |
| if len(token) < 10: | |
| raise gr.Error("π Token is too short. Please paste the full token.") | |
| try: | |
| api = HfApi(token=token) | |
| user_info = api.whoami() | |
| except Exception as e: | |
| error_msg = str(e).lower() | |
| if "401" in error_msg or "unauthorized" in error_msg or "invalid" in error_msg: | |
| raise gr.Error( | |
| "π Token is invalid or expired. Please generate a new token at:\n" | |
| "https://huggingface.co/settings/tokens" | |
| ) | |
| raise gr.Error(f"π Could not verify token: {type(e).__name__}: {e}") | |
| username = user_info.get("name") or user_info.get("user") or "" | |
| if not username: | |
| raise gr.Error("π Could not determine your username from the token.") | |
| return token | |
| def validate_space_id(space_id: str, repo_name: str, token: str) -> str: | |
| """Validate or auto-generate the HF Space ID.""" | |
| if space_id and space_id.strip(): | |
| space_id = space_id.strip() | |
| if not HF_SPACE_ID_PATTERN.match(space_id): | |
| raise gr.Error( | |
| "π Invalid Space ID format. Expected: `username/space-name`\n\n" | |
| "- Only letters, numbers, hyphens, underscores, and dots are allowed\n" | |
| f"- Got: `{space_id}`" | |
| ) | |
| return space_id | |
| try: | |
| api = HfApi(token=token) | |
| user_info = api.whoami() | |
| username = user_info.get("name") or user_info.get("user") or "user" | |
| except Exception as e: | |
| raise gr.Error(f"Could not determine your HF username for auto-naming: {e}") | |
| safe_name = re.sub(r"[^a-zA-Z0-9\-_.]", "-", repo_name) | |
| safe_name = re.sub(r"-+", "-", safe_name).strip("-") | |
| if not safe_name: | |
| safe_name = "imported-repo" | |
| auto_id = f"{username}/{safe_name}" | |
| gr.Info(f"Auto-generated Space ID: **{auto_id}**") | |
| return auto_id | |
| def validate_branch(branch: str) -> Optional[str]: | |
| """Validate branch name. Returns cleaned branch or None.""" | |
| if not branch or not branch.strip(): | |
| return None | |
| branch = branch.strip() | |
| if ".." in branch or branch.startswith("/") or branch.endswith("/"): | |
| raise gr.Error(f"πΏ Invalid branch name: `{branch}`") | |
| if len(branch) > 250: | |
| raise gr.Error("πΏ Branch name too long.") | |
| if re.search(r'[;&|`$(){}[\]<>!]', branch): | |
| raise gr.Error(f"πΏ Branch name contains invalid characters: `{branch}`") | |
| return branch | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # SDK detection | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def detect_sdk(project_dir: str) -> tuple[str, str]: | |
| """Auto-detect SDK by examining project files. Returns (sdk_name, reason).""" | |
| files_at_root = set(os.listdir(project_dir)) | |
| files_at_root_lower = {f.lower() for f in files_at_root} | |
| if "Dockerfile" in files_at_root or "dockerfile" in files_at_root_lower: | |
| return SDK.DOCKER.value, "Found Dockerfile in project root" | |
| gradio_score = 0 | |
| streamlit_score = 0 | |
| scanned_files = 0 | |
| scan_errors = 0 | |
| for root, dirs, fnames in os.walk(project_dir): | |
| dirs[:] = [ | |
| d for d in dirs | |
| if not d.startswith(".") and d not in { | |
| "node_modules", "__pycache__", ".git", "venv", ".venv", | |
| "env", ".tox", ".nox", "dist", "build", | |
| } | |
| ] | |
| for fname in fnames: | |
| if not fname.endswith(".py"): | |
| continue | |
| scanned_files += 1 | |
| fpath = os.path.join(root, fname) | |
| try: | |
| with open(fpath, "r", errors="replace") as f: | |
| content = f.read(50_000) | |
| if "import gradio" in content or "from gradio" in content: | |
| gradio_score += 2 | |
| if "gr.Blocks" in content or "gr.Interface" in content: | |
| gradio_score += 3 | |
| if ".launch(" in content: | |
| gradio_score += 1 | |
| if "import streamlit" in content or "from streamlit" in content: | |
| streamlit_score += 2 | |
| if "st.title" in content or "st.write" in content: | |
| streamlit_score += 3 | |
| except PermissionError: | |
| scan_errors += 1 | |
| except Exception as e: | |
| scan_errors += 1 | |
| logger.debug(f"SDK scan error on {fpath}: {e}") | |
| if gradio_score > 0 and gradio_score >= streamlit_score: | |
| return SDK.GRADIO.value, f"Detected Gradio imports (score: {gradio_score}, scanned {scanned_files} .py files)" | |
| if streamlit_score > 0: | |
| return SDK.STREAMLIT.value, f"Detected Streamlit imports (score: {streamlit_score}, scanned {scanned_files} .py files)" | |
| for req_file in ["requirements.txt", "pyproject.toml", "setup.cfg", "setup.py"]: | |
| req_path = os.path.join(project_dir, req_file) | |
| if os.path.exists(req_path): | |
| try: | |
| with open(req_path, "r", errors="replace") as f: | |
| content = f.read().lower() | |
| if "gradio" in content: | |
| return SDK.GRADIO.value, f"Found 'gradio' in {req_file}" | |
| if "streamlit" in content: | |
| return SDK.STREAMLIT.value, f"Found 'streamlit' in {req_file}" | |
| except Exception: | |
| pass | |
| if "index.html" in files_at_root_lower: | |
| return SDK.STATIC.value, "Found index.html in project root" | |
| return SDK.GRADIO.value, f"No framework detected (scanned {scanned_files} .py files) β defaulting to Gradio" | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # File tree builder | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def build_file_tree(project_dir: str, max_files: int = 80) -> tuple[str, int, int]: | |
| """Build a visual file tree. Returns (tree_string, file_count, total_size_bytes).""" | |
| lines = [] | |
| file_count = 0 | |
| total_size = 0 | |
| truncated = False | |
| for root, dirs, files in os.walk(project_dir): | |
| dirs[:] = sorted(d for d in dirs if not d.startswith(".") and d not in { | |
| "node_modules", "__pycache__", ".git", "venv", ".venv", ".tox", | |
| }) | |
| files = sorted(files) | |
| level = root.replace(project_dir, "").count(os.sep) | |
| indent = "β " * level | |
| if level > 0: | |
| dirname = os.path.basename(root) | |
| lines.append(f"{indent}π {dirname}/") | |
| for fname in files: | |
| if file_count >= max_files: | |
| truncated = True | |
| break | |
| fpath = os.path.join(root, fname) | |
| try: | |
| fsize = os.path.getsize(fpath) | |
| except OSError: | |
| fsize = 0 | |
| total_size += fsize | |
| file_count += 1 | |
| file_indent = "β " * (level + 1) | |
| size_str = format_size(fsize) | |
| lines.append(f"{file_indent}π {fname} ({size_str})") | |
| if truncated: | |
| break | |
| if truncated: | |
| lines.append(f"\n... and more files (showing first {max_files})") | |
| tree = "\n".join(lines) if lines else "(empty repository)" | |
| return tree, file_count, total_size | |
| def format_size(size_bytes: int) -> str: | |
| if size_bytes < 1024: | |
| return f"{size_bytes} B" | |
| elif size_bytes < 1024 ** 2: | |
| return f"{size_bytes / 1024:.1f} KB" | |
| elif size_bytes < 1024 ** 3: | |
| return f"{size_bytes / (1024 ** 2):.1f} MB" | |
| else: | |
| return f"{size_bytes / (1024 ** 3):.2f} GB" | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Status builder (accumulates steps for the streaming UI) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class StatusBuilder: | |
| """Accumulates step statuses and renders as markdown.""" | |
| def __init__(self): | |
| self.steps: list[tuple[str, str, str]] = [] | |
| def add(self, emoji: str, label: str, detail: str = ""): | |
| self.steps.append((emoji, label, detail)) | |
| def update_last(self, emoji: str, label: str, detail: str = ""): | |
| if self.steps: | |
| self.steps[-1] = (emoji, label, detail) | |
| def render(self) -> str: | |
| lines = [] | |
| for emoji, label, detail in self.steps: | |
| line = f"{emoji} **{label}**" | |
| if detail: | |
| line += f" β {detail}" | |
| lines.append(line) | |
| return "\n\n".join(lines) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Core import logic (generator for streaming updates) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def import_github_to_hf( | |
| github_url: str, | |
| hf_space_id: str, | |
| sdk_choice: str, | |
| hf_token: str, | |
| private: bool, | |
| branch: str, | |
| progress=gr.Progress(), | |
| ) -> Generator[tuple[str, str], None, None]: | |
| """Clone a GitHub repo and push it to a Hugging Face Space.""" | |
| status = StatusBuilder() | |
| tmpdir: Optional[str] = None | |
| try: | |
| # ββ Step 0: Validate all inputs ββββββββββββββββββββββββββββββ | |
| progress(0.02, desc="Validating inputs...") | |
| status.add("π", "Validating inputs") | |
| yield status.render(), "" | |
| github_url, owner, repo_name = validate_github_url(github_url) | |
| hf_token = validate_hf_token(hf_token) | |
| hf_space_id = validate_space_id(hf_space_id, repo_name, hf_token) | |
| branch_name = validate_branch(branch) | |
| status.update_last("β ", "Inputs validated", | |
| f"`{owner}/{repo_name}` β `{hf_space_id}`") | |
| yield status.render(), "" | |
| # ββ Step 1: Clone βββββββββββββββββββββββββββββββββββββββββββββ | |
| progress(0.10, desc="Cloning repository...") | |
| status.add("π", "Step 1/4: Cloning repository", | |
| f"`{github_url}`" + (f" (branch: `{branch_name}`)" if branch_name else "")) | |
| yield status.render(), "" | |
| tmpdir = tempfile.mkdtemp(prefix="ghimport_") | |
| logger.info(f"Cloning {github_url} to {tmpdir}") | |
| clone_cmd = ["git", "clone", "--depth=1", "--single-branch"] | |
| if branch_name: | |
| clone_cmd += ["-b", branch_name] | |
| clone_cmd += [github_url, tmpdir] | |
| try: | |
| result = subprocess.run( | |
| clone_cmd, | |
| capture_output=True, | |
| text=True, | |
| timeout=CLONE_TIMEOUT_SECONDS, | |
| env={**os.environ, "GIT_TERMINAL_PROMPT": "0"}, | |
| ) | |
| except subprocess.TimeoutExpired: | |
| raise gr.Error( | |
| f"β° Git clone timed out after {CLONE_TIMEOUT_SECONDS} seconds.\n\n" | |
| "The repository may be too large or the server may be unreachable.\n" | |
| "Try cloning a specific branch with fewer files." | |
| ) | |
| except FileNotFoundError: | |
| raise gr.Error( | |
| "π§ `git` is not installed on this server. " | |
| "This is a server configuration issue." | |
| ) | |
| except OSError as e: | |
| raise gr.Error(f"π§ System error during clone: {e}") | |
| if result.returncode != 0: | |
| stderr = result.stderr.strip() | |
| logger.error(f"Git clone failed: {stderr}") | |
| if "not found" in stderr.lower() or "does not exist" in stderr.lower(): | |
| raise gr.Error( | |
| f"π Repository not found: `{github_url}`\n\n" | |
| "- Check the URL for typos\n" | |
| "- Make sure the repository is **public**\n" | |
| "- Private repos require GitHub authentication (not supported)" | |
| ) | |
| elif "could not read" in stderr.lower() and "branch" in stderr.lower(): | |
| raise gr.Error( | |
| f"πΏ Branch `{branch_name}` not found in `{owner}/{repo_name}`.\n\n" | |
| "Leave the branch field empty to use the default branch, " | |
| "or check the branch name on GitHub." | |
| ) | |
| elif "authentication" in stderr.lower() or "permission" in stderr.lower(): | |
| raise gr.Error( | |
| f"π Repository requires authentication: `{github_url}`\n\n" | |
| "This tool only supports **public** GitHub repositories." | |
| ) | |
| elif "ssl" in stderr.lower() or "certificate" in stderr.lower(): | |
| raise gr.Error( | |
| "π SSL/TLS error connecting to GitHub. " | |
| "This is likely a temporary network issue. Please try again." | |
| ) | |
| else: | |
| raise gr.Error( | |
| f"β Git clone failed:\n```\n{stderr[:500]}\n```\n\n" | |
| "Check the URL and try again." | |
| ) | |
| git_dir = os.path.join(tmpdir, ".git") | |
| if os.path.isdir(git_dir): | |
| shutil.rmtree(git_dir, ignore_errors=True) | |
| progress(0.30, desc="Analyzing repository...") | |
| file_tree, file_count, total_size = build_file_tree(tmpdir) | |
| total_size_mb = total_size / (1024 ** 2) | |
| if file_count == 0: | |
| raise gr.Error("β The cloned repository is empty (no files found).") | |
| if total_size_mb > MAX_REPO_SIZE_MB: | |
| raise gr.Error( | |
| f"π¦ Repository too large: {total_size_mb:.0f} MB " | |
| f"(limit: {MAX_REPO_SIZE_MB} MB).\n\n" | |
| "Try a smaller repository or fork with reduced history." | |
| ) | |
| if file_count > MAX_FILES_TO_UPLOAD: | |
| gr.Warning( | |
| f"π¦ Repository has {file_count} files (limit: {MAX_FILES_TO_UPLOAD}). " | |
| "Some files may be excluded." | |
| ) | |
| files_md = ( | |
| f"### π Repository Files ({file_count} files, {format_size(total_size)})\n" | |
| f"```\n{file_tree}\n```" | |
| ) | |
| status.update_last("β ", "Step 1/4: Repository cloned", | |
| f"{file_count} files, {format_size(total_size)}") | |
| yield status.render(), files_md | |
| # ββ Step 2: Detect SDK ββββββββββββββββββββββββββββββββββββββββ | |
| progress(0.40, desc="Detecting SDK...") | |
| status.add("π", "Step 2/4: Detecting SDK") | |
| yield status.render(), files_md | |
| if sdk_choice == SDK.AUTO.value: | |
| detected_sdk, detection_reason = detect_sdk(tmpdir) | |
| sdk_to_use = detected_sdk | |
| sdk_msg = f"Auto-detected **{detected_sdk}** ({detection_reason})" | |
| else: | |
| sdk_to_use = sdk_choice | |
| sdk_msg = f"Using selected SDK: **{sdk_choice}**" | |
| status.update_last("β ", "Step 2/4: SDK determined", sdk_msg) | |
| yield status.render(), files_md | |
| # ββ Step 3: Create Space ββββββββββββββββββββββββββββββββββββββ | |
| progress(0.50, desc="Creating HF Space...") | |
| status.add("π", "Step 3/4: Creating Space", f"`{hf_space_id}` ({sdk_to_use})") | |
| yield status.render(), files_md | |
| api = HfApi(token=hf_token) | |
| try: | |
| api.create_repo( | |
| repo_id=hf_space_id, | |
| repo_type="space", | |
| space_sdk=sdk_to_use, | |
| private=private, | |
| exist_ok=True, | |
| ) | |
| except HfHubHTTPError as e: | |
| status_code = getattr(e.response, "status_code", None) if hasattr(e, "response") else None | |
| if status_code == 403: | |
| raise gr.Error( | |
| f"π Permission denied creating `{hf_space_id}`.\n\n" | |
| "Your token may not have write access, or you may not have " | |
| "permission to create Spaces in that namespace." | |
| ) | |
| elif status_code == 409: | |
| gr.Warning(f"Space `{hf_space_id}` already exists β will overwrite files.") | |
| else: | |
| raise gr.Error(f"β Failed to create Space: {e}") | |
| except Exception as e: | |
| logger.error(f"Space creation error: {e}") | |
| traceback.print_exc() | |
| raise gr.Error( | |
| f"β Failed to create Space `{hf_space_id}`:\n" | |
| f"{type(e).__name__}: {e}" | |
| ) | |
| status.update_last("β ", "Step 3/4: Space created", | |
| f"[{hf_space_id}](https://huggingface.co/spaces/{hf_space_id})") | |
| yield status.render(), files_md | |
| # ββ Step 4: Upload files ββββββββββββββββββββββββββββββββββββββ | |
| progress(0.60, desc="Uploading files...") | |
| status.add("π", "Step 4/4: Uploading files", | |
| f"{file_count} files to `{hf_space_id}`") | |
| yield status.render(), files_md | |
| try: | |
| api.upload_folder( | |
| folder_path=tmpdir, | |
| repo_id=hf_space_id, | |
| repo_type="space", | |
| commit_message=f"Import from {github_url}", | |
| ignore_patterns=UPLOAD_IGNORE_PATTERNS, | |
| ) | |
| except HfHubHTTPError as e: | |
| status_code = getattr(e.response, "status_code", None) if hasattr(e, "response") else None | |
| if status_code == 413: | |
| raise gr.Error( | |
| "π¦ Upload rejected β files too large for the HF Hub.\n\n" | |
| "Try a smaller repository or exclude large binary files." | |
| ) | |
| raise gr.Error(f"β Upload failed: {e}") | |
| except Exception as e: | |
| logger.error(f"Upload error: {e}") | |
| traceback.print_exc() | |
| raise gr.Error( | |
| f"β Failed to upload files:\n{type(e).__name__}: {e}" | |
| ) | |
| progress(0.95, desc="Finalizing...") | |
| space_url = f"https://huggingface.co/spaces/{hf_space_id}" | |
| status.update_last("β ", "Step 4/4: Files uploaded") | |
| # ββ Success βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| progress(1.0, desc="Import complete!") | |
| final_status = status.render() + f""" | |
| --- | |
| ## β Import Complete! | |
| | Detail | Value | | |
| |--------|-------| | |
| | **Source** | [{github_url}]({github_url}) | | |
| | **Branch** | {branch_name or '(default)'} | | |
| | **Space** | [{hf_space_id}]({space_url}) | | |
| | **SDK** | {sdk_to_use} | | |
| | **Files** | {file_count} | | |
| | **Size** | {format_size(total_size)} | | |
| | **Visibility** | {'π Private' if private else 'π Public'} | | |
| ### π **[Open your Space β]({space_url})** | |
| > The Space may take a minute to build. Refresh the link above if it shows "Building". | |
| """ | |
| yield final_status, files_md | |
| gr.Info(f"β Import complete! Space: {space_url}") | |
| except gr.Error: | |
| raise | |
| except MemoryError: | |
| logger.error("MemoryError during import") | |
| raise gr.Error( | |
| "π₯ Out of memory! The repository is too large to process. " | |
| "Try a smaller repository." | |
| ) | |
| except KeyboardInterrupt: | |
| logger.warning("Import interrupted by user") | |
| raise gr.Error("π Import was interrupted.") | |
| except Exception as e: | |
| logger.error(f"Unexpected error: {type(e).__name__}: {e}") | |
| traceback.print_exc() | |
| raise gr.Error( | |
| f"π₯ An unexpected error occurred:\n" | |
| f"{type(e).__name__}: {e}\n\n" | |
| "If this persists, please report it as a bug." | |
| ) | |
| finally: | |
| if tmpdir and os.path.exists(tmpdir): | |
| try: | |
| shutil.rmtree(tmpdir, ignore_errors=True) | |
| logger.info(f"Cleaned up temp directory: {tmpdir}") | |
| except Exception as e: | |
| logger.warning(f"Cleanup warning: {e}") | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Gradio UI | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks( | |
| title="π GitHub β HF Spaces Importer", | |
| ) as demo: | |
| gr.Markdown(""" | |
| # π GitHub β Hugging Face Spaces Importer | |
| Import any **public** GitHub repository directly into a Hugging Face Space. | |
| The tool clones the repo, auto-detects the framework, creates the Space, and uploads all files. | |
| """) | |
| with gr.Group(): | |
| gr.Markdown("### π Source Repository") | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| github_url_input = gr.Textbox( | |
| label="GitHub Repository URL", | |
| placeholder="https://github.com/owner/repo", | |
| info="Public repo URL. Also accepts owner/repo format.", | |
| max_lines=1, | |
| ) | |
| with gr.Column(scale=1): | |
| branch_input = gr.Textbox( | |
| label="Branch (optional)", | |
| placeholder="main", | |
| info="Leave empty for the default branch", | |
| max_lines=1, | |
| ) | |
| with gr.Group(): | |
| gr.Markdown("### π€ Destination Space") | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| space_id_input = gr.Textbox( | |
| label="HF Space ID (optional)", | |
| placeholder="your-username/space-name", | |
| info="Leave empty to auto-generate from the repo name", | |
| max_lines=1, | |
| ) | |
| with gr.Column(scale=1): | |
| sdk_dropdown = gr.Dropdown( | |
| choices=[ | |
| ("π Auto-detect", "auto-detect"), | |
| ("π Gradio", "gradio"), | |
| ("π΄ Streamlit", "streamlit"), | |
| ("π³ Docker", "docker"), | |
| ("π Static HTML", "static"), | |
| ], | |
| value="auto-detect", | |
| label="Space SDK", | |
| info="Auto-detect scans imports, Dockerfile, and index.html", | |
| ) | |
| with gr.Group(): | |
| gr.Markdown("### π Authentication & Options") | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| token_input = gr.Textbox( | |
| label="Hugging Face Token", | |
| type="password", | |
| placeholder="hf_...", | |
| info="Needs **write** access Β· [Get a token β](https://huggingface.co/settings/tokens)", | |
| max_lines=1, | |
| ) | |
| with gr.Column(scale=1): | |
| private_checkbox = gr.Checkbox( | |
| label="π Private Space", | |
| value=False, | |
| info="Only you (and your org) can see it", | |
| ) | |
| import_btn = gr.Button( | |
| "π Import to Hugging Face", | |
| variant="primary", | |
| size="lg", | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| status_output = gr.Markdown( | |
| value="*Enter a GitHub URL and click Import to get started.*", | |
| label="Import Status", | |
| ) | |
| with gr.Column(scale=1): | |
| files_output = gr.Markdown( | |
| value="", | |
| label="Repository Files", | |
| ) | |
| with gr.Accordion("βΉοΈ Notes & Troubleshooting", open=False): | |
| gr.Markdown(f""" | |
| ### Supported Repositories | |
| - **Public** GitHub repositories only (private repos require GitHub auth, which is not supported) | |
| - Maximum repository size: **{MAX_REPO_SIZE_MB} MB** after cloning | |
| - Clone timeout: **{CLONE_TIMEOUT_SECONDS} seconds** | |
| ### SDK Auto-Detection | |
| The auto-detector scans your project in this priority order: | |
| 1. **Dockerfile** in root β Docker | |
| 2. **Python imports** (`import gradio` / `import streamlit`) β matching framework | |
| 3. **requirements.txt / pyproject.toml** β checks for framework dependencies | |
| 4. **index.html** in root β Static | |
| 5. **Default** β Gradio (if nothing detected) | |
| ### Excluded Files | |
| These patterns are excluded during upload: | |
| `{', '.join(UPLOAD_IGNORE_PATTERNS[:10])}`, ... | |
| ### Common Issues | |
| | Problem | Solution | | |
| |---------|----------| | |
| | "Repository not found" | Check URL, ensure repo is public | | |
| | "Branch not found" | Leave branch empty for default, or verify branch name | | |
| | "Permission denied" | Ensure your HF token has write access | | |
| | "Clone timed out" | Repository may be very large; try a specific branch | | |
| | Space shows "Building" | Wait 1β2 minutes for the Space to build and deploy | | |
| """) | |
| import_btn.click( | |
| fn=import_github_to_hf, | |
| inputs=[ | |
| github_url_input, | |
| space_id_input, | |
| sdk_dropdown, | |
| token_input, | |
| private_checkbox, | |
| branch_input, | |
| ], | |
| outputs=[status_output, files_output], | |
| concurrency_limit=CONCURRENCY_LIMIT, | |
| concurrency_id="github_import", | |
| trigger_mode="once", | |
| ) | |
| demo.queue(default_concurrency_limit=CONCURRENCY_LIMIT, max_size=10) | |
| if __name__ == "__main__": | |
| demo.launch( | |
| show_error=True, | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| footer { display: none !important; } | |
| .info-box { background: #f0f7ff; border-radius: 8px; padding: 12px; margin: 8px 0; } | |
| """, | |
| ) | |