cheekeong2025's picture
Create loader.py
ed2823a verified
# loader.py β€” robust public loader for private Streamlit Space
import os, os.path, sys, subprocess, difflib
from urllib.parse import urlparse
from huggingface_hub import HfApi, snapshot_download
from huggingface_hub.utils import HfHubHTTPError
# ========= CONFIG (can override via Secrets/Env) =========
PORT = os.environ.get("PORT", "8501")
# IMPORTANT: LoRA worker entry file:
PRIVATE_ENTRY_REL = os.environ.get("PRIVATE_ENTRY_REL", "streamlit_app.py")
# Set PRIVATE_SPACE_ID in Secrets to your private LoRA Space:
EXPLICIT_REPO_ID = os.environ.get("PRIVATE_SPACE_ID", "").strip()
# Optional: if you want to derive from URL, but not required:
SPACE_URL = os.environ.get("PRIVATE_SPACE_URL", "").strip()
# ========================================================
def fail(msg: str, code: int = 1):
print(f"[loader] ERROR: {msg}", file=sys.stderr)
sys.exit(code)
def desired_slug_from_url(url: str) -> str | None:
# Parse owner-slug from 'https://owner-slug.hf.space'
host = urlparse(url).hostname or ""
if not host.endswith(".hf.space"):
return None
sub = host[:-len(".hf.space")]
# first '-' splits owner from slug
i = sub.find("-")
if i <= 0:
return None
return sub[i+1:] # just the slug part
def list_all_visible_spaces(api: HfApi, user: str, orgs: list[str], token: str):
visible = []
# user’s spaces
try:
visible += api.list_spaces(author=user, token=token)
except Exception as e:
print(f"[loader] warn: list_spaces user failed: {e}")
# org spaces
for org in orgs:
try:
visible += api.list_spaces(author=org, token=token)
except Exception as e:
print(f"[loader] warn: list_spaces org '{org}' failed: {e}")
# deduplicate by id
seen, uniq = set(), []
for sp in visible:
if sp.id not in seen:
uniq.append(sp)
seen.add(sp.id)
return uniq
def choose_best_repo_id(api: HfApi, token: str) -> str:
"""
Strategy:
1) If EXPLICIT_REPO_ID is set and exists -> use it.
2) Else build a desired slug from PRIVATE_SPACE_URL (if given),
then list spaces across user+orgs and fuzzy pick closest.
3) Otherwise prompt to set PRIVATE_SPACE_ID.
"""
# Whoami -> user + orgs
try:
who = api.whoami(token=token)
except Exception as e:
fail(f"Cannot call whoami with this token: {e}")
user = who.get("name")
orgs = [o.get("name") for o in who.get("orgs", []) if o.get("name")]
print(f"[loader] Using HF identity: {user}; orgs={orgs}")
# 1) explicit repo id
if EXPLICIT_REPO_ID:
try:
api.repo_info(repo_id=EXPLICIT_REPO_ID, repo_type="space", token=token)
print(f"[loader] Using explicit PRIVATE_SPACE_ID: {EXPLICIT_REPO_ID}")
return EXPLICIT_REPO_ID
except HfHubHTTPError as e:
print(f"[loader] Explicit PRIVATE_SPACE_ID not accessible: {e}")
# 2) discover by slug
desired_slug = None
if SPACE_URL:
desired_slug = desired_slug_from_url(SPACE_URL)
if desired_slug:
print(f"[loader] Desired slug from URL: {desired_slug}")
spaces = list_all_visible_spaces(api, user, orgs, token)
if not spaces:
fail("Token sees no Spaces. Ensure this token belongs to an owner/collaborator of the PRIVATE Space.")
# Build candidate list & fuzzy score
ids, slugs = [], []
for sp in spaces:
ids.append(sp.id) # 'owner/slug'
slugs.append(sp.id.split("/",1)[1])
if desired_slug:
# Try exact match first
for rid in ids:
if rid.split("/",1)[1] == desired_slug:
print(f"[loader] Exact match: {rid}")
return rid
# Fuzzy pick
match = difflib.get_close_matches(desired_slug, slugs, n=1, cutoff=0.6)
if match:
chosen_slug = match[0]
rid = next(r for r in ids if r.endswith("/"+chosen_slug))
print(f"[loader] Fuzzy match chose: {rid}")
return rid
# Fallback heuristic (not critical since we’ll set PRIVATE_SPACE_ID)
keyword = "team-assignment-and-attendance-taking"
match = difflib.get_close_matches(keyword, slugs, n=1, cutoff=0.4)
if match:
rid = next(r for r in ids if r.endswith("/"+match[0]))
print(f"[loader] Heuristic match chose: {rid}")
return rid
print("[loader] Could not guess. Token can read these spaces:")
for rid in ids:
print(" -", rid)
fail("Set PRIVATE_SPACE_ID in Secrets to the exact owner/space of your private app.")
def main():
token = os.environ.get("HF_TOKEN")
if not token:
fail("HF_TOKEN is not set (Space β†’ Settings β†’ Secrets).")
# Use /tmp for caches and download dir (always writable on Spaces)
os.environ.setdefault("HF_HOME", "/tmp/hf")
os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/tmp/hf/cache")
os.environ.setdefault("HF_HUB_CACHE", "/tmp/hf/cache")
for p in ("/tmp/hf/cache", "/tmp/private_cache"):
os.makedirs(p, exist_ok=True)
api = HfApi()
# Choose the correct private repo id
repo_id = choose_best_repo_id(api, token)
# Download the private Space
try:
local_dir = snapshot_download(
repo_id=repo_id,
repo_type="space",
token=token,
local_dir="/tmp/private_cache",
local_dir_use_symlinks=False,
)
print(f"[loader] Downloaded to: {local_dir}")
except HfHubHTTPError as e:
fail(f"Failed to download '{repo_id}'. Ensure this token can READ that private Space.\nOriginal: {e}")
# Find entry file
entry_path = os.path.join(local_dir, PRIVATE_ENTRY_REL)
if not os.path.exists(entry_path):
fail(f"Entry file not found: {entry_path}. Set PRIVATE_ENTRY_REL (e.g., src/streamlit_app.py).")
# Launch Streamlit
os.chdir(os.path.dirname(entry_path))
cmd = [
"streamlit", "run", os.path.basename(entry_path),
"--server.port", str(PORT), "--server.address", "0.0.0.0"
]
print(f"[loader] Launching: {' '.join(cmd)}")
subprocess.run(cmd, check=True)
if __name__ == "__main__":
main()