# loader.py — robust public loader for private Streamlit Space import os, os.path, sys, subprocess, difflib from urllib.parse import urlparse from huggingface_hub import HfApi, snapshot_download from huggingface_hub.utils import HfHubHTTPError # ========= CONFIG (can override via Secrets/Env) ========= PORT = os.environ.get("PORT", "8501") # IMPORTANT: LoRA worker entry file: PRIVATE_ENTRY_REL = os.environ.get("PRIVATE_ENTRY_REL", "streamlit_app.py") # Set PRIVATE_SPACE_ID in Secrets to your private LoRA Space: EXPLICIT_REPO_ID = os.environ.get("PRIVATE_SPACE_ID", "").strip() # Optional: if you want to derive from URL, but not required: SPACE_URL = os.environ.get("PRIVATE_SPACE_URL", "").strip() # ======================================================== def fail(msg: str, code: int = 1): print(f"[loader] ERROR: {msg}", file=sys.stderr) sys.exit(code) def desired_slug_from_url(url: str) -> str | None: # Parse owner-slug from 'https://owner-slug.hf.space' host = urlparse(url).hostname or "" if not host.endswith(".hf.space"): return None sub = host[:-len(".hf.space")] # first '-' splits owner from slug i = sub.find("-") if i <= 0: return None return sub[i+1:] # just the slug part def list_all_visible_spaces(api: HfApi, user: str, orgs: list[str], token: str): visible = [] # user’s spaces try: visible += api.list_spaces(author=user, token=token) except Exception as e: print(f"[loader] warn: list_spaces user failed: {e}") # org spaces for org in orgs: try: visible += api.list_spaces(author=org, token=token) except Exception as e: print(f"[loader] warn: list_spaces org '{org}' failed: {e}") # deduplicate by id seen, uniq = set(), [] for sp in visible: if sp.id not in seen: uniq.append(sp) seen.add(sp.id) return uniq def choose_best_repo_id(api: HfApi, token: str) -> str: """ Strategy: 1) If EXPLICIT_REPO_ID is set and exists -> use it. 2) Else build a desired slug from PRIVATE_SPACE_URL (if given), then list spaces across user+orgs and fuzzy pick closest. 3) Otherwise prompt to set PRIVATE_SPACE_ID. """ # Whoami -> user + orgs try: who = api.whoami(token=token) except Exception as e: fail(f"Cannot call whoami with this token: {e}") user = who.get("name") orgs = [o.get("name") for o in who.get("orgs", []) if o.get("name")] print(f"[loader] Using HF identity: {user}; orgs={orgs}") # 1) explicit repo id if EXPLICIT_REPO_ID: try: api.repo_info(repo_id=EXPLICIT_REPO_ID, repo_type="space", token=token) print(f"[loader] Using explicit PRIVATE_SPACE_ID: {EXPLICIT_REPO_ID}") return EXPLICIT_REPO_ID except HfHubHTTPError as e: print(f"[loader] Explicit PRIVATE_SPACE_ID not accessible: {e}") # 2) discover by slug desired_slug = None if SPACE_URL: desired_slug = desired_slug_from_url(SPACE_URL) if desired_slug: print(f"[loader] Desired slug from URL: {desired_slug}") spaces = list_all_visible_spaces(api, user, orgs, token) if not spaces: fail("Token sees no Spaces. Ensure this token belongs to an owner/collaborator of the PRIVATE Space.") # Build candidate list & fuzzy score ids, slugs = [], [] for sp in spaces: ids.append(sp.id) # 'owner/slug' slugs.append(sp.id.split("/",1)[1]) if desired_slug: # Try exact match first for rid in ids: if rid.split("/",1)[1] == desired_slug: print(f"[loader] Exact match: {rid}") return rid # Fuzzy pick match = difflib.get_close_matches(desired_slug, slugs, n=1, cutoff=0.6) if match: chosen_slug = match[0] rid = next(r for r in ids if r.endswith("/"+chosen_slug)) print(f"[loader] Fuzzy match chose: {rid}") return rid # Fallback heuristic (not critical since we’ll set PRIVATE_SPACE_ID) keyword = "team-assignment-and-attendance-taking" match = difflib.get_close_matches(keyword, slugs, n=1, cutoff=0.4) if match: rid = next(r for r in ids if r.endswith("/"+match[0])) print(f"[loader] Heuristic match chose: {rid}") return rid print("[loader] Could not guess. Token can read these spaces:") for rid in ids: print(" -", rid) fail("Set PRIVATE_SPACE_ID in Secrets to the exact owner/space of your private app.") def main(): token = os.environ.get("HF_TOKEN") if not token: fail("HF_TOKEN is not set (Space → Settings → Secrets).") # Use /tmp for caches and download dir (always writable on Spaces) os.environ.setdefault("HF_HOME", "/tmp/hf") os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/tmp/hf/cache") os.environ.setdefault("HF_HUB_CACHE", "/tmp/hf/cache") for p in ("/tmp/hf/cache", "/tmp/private_cache"): os.makedirs(p, exist_ok=True) api = HfApi() # Choose the correct private repo id repo_id = choose_best_repo_id(api, token) # Download the private Space try: local_dir = snapshot_download( repo_id=repo_id, repo_type="space", token=token, local_dir="/tmp/private_cache", local_dir_use_symlinks=False, ) print(f"[loader] Downloaded to: {local_dir}") except HfHubHTTPError as e: fail(f"Failed to download '{repo_id}'. Ensure this token can READ that private Space.\nOriginal: {e}") # Find entry file entry_path = os.path.join(local_dir, PRIVATE_ENTRY_REL) if not os.path.exists(entry_path): fail(f"Entry file not found: {entry_path}. Set PRIVATE_ENTRY_REL (e.g., src/streamlit_app.py).") # Launch Streamlit os.chdir(os.path.dirname(entry_path)) cmd = [ "streamlit", "run", os.path.basename(entry_path), "--server.port", str(PORT), "--server.address", "0.0.0.0" ] print(f"[loader] Launching: {' '.join(cmd)}") subprocess.run(cmd, check=True) if __name__ == "__main__": main()