Spaces:
Sleeping
Sleeping
dasdebanna
commited on
Commit
·
f978fb1
1
Parent(s):
118bb5c
Launcher: ensure repo-local HF/transformers cache directories are created and writable
Browse files- streamlit_app.py +37 -1
streamlit_app.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
| 3 |
Launcher for Hugging Face Spaces / Streamlit:
|
| 4 |
- force XDG_CONFIG_HOME/HOME to a writable folder in the repo
|
| 5 |
- create a minimal .streamlit/config.toml if missing
|
|
|
|
| 6 |
- add repo/src to sys.path so `from data_loader import ...` works
|
| 7 |
- run the actual app (src/app.py) as __main__
|
| 8 |
"""
|
|
@@ -10,18 +11,33 @@ import os
|
|
| 10 |
from pathlib import Path
|
| 11 |
import runpy
|
| 12 |
import sys
|
|
|
|
| 13 |
|
| 14 |
ROOT = Path(__file__).parent.resolve()
|
| 15 |
STREAMLIT_DIR = ROOT.joinpath(".streamlit")
|
| 16 |
|
| 17 |
# --- FORCE environment variables (overwrite any existing HF defaults) ---
|
| 18 |
-
# Use a repo-local .streamlit directory so Streamlit won't try to write to '/'
|
| 19 |
os.environ["XDG_CONFIG_HOME"] = str(STREAMLIT_DIR)
|
| 20 |
# Ensure HOME is a writable folder inside the repo as well
|
| 21 |
os.environ["HOME"] = str(ROOT)
|
| 22 |
# Also set STREAMLIT_RUNTIME_DIR (added safety)
|
| 23 |
os.environ.setdefault("STREAMLIT_RUNTIME_DIR", str(ROOT.joinpath(".streamlit", "runtime")))
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
# 2) Create .streamlit and a minimal config.toml if it doesn't exist
|
| 26 |
STREAMLIT_DIR.mkdir(parents=True, exist_ok=True)
|
| 27 |
cfg = STREAMLIT_DIR.joinpath("config.toml")
|
|
@@ -41,6 +57,16 @@ if not cfg.exists():
|
|
| 41 |
runtime_dir = Path(os.environ.get("STREAMLIT_RUNTIME_DIR"))
|
| 42 |
runtime_dir.mkdir(parents=True, exist_ok=True)
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
# --- ADD src directory to Python import path so app can import modules by name ---
|
| 45 |
SRC_DIR = str(ROOT.joinpath("src"))
|
| 46 |
if SRC_DIR not in sys.path:
|
|
@@ -51,7 +77,17 @@ if SRC_DIR not in sys.path:
|
|
| 51 |
print("streamlit_app launcher: XDG_CONFIG_HOME =", os.environ.get("XDG_CONFIG_HOME"))
|
| 52 |
print("streamlit_app launcher: HOME =", os.environ.get("HOME"))
|
| 53 |
print("streamlit_app launcher: STREAMLIT_RUNTIME_DIR =", os.environ.get("STREAMLIT_RUNTIME_DIR"))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
print("streamlit_app launcher: sys.path[0] =", sys.path[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
sys.stdout.flush()
|
| 56 |
|
| 57 |
# 4) Run your real Streamlit app script (runs as __main__)
|
|
|
|
| 3 |
Launcher for Hugging Face Spaces / Streamlit:
|
| 4 |
- force XDG_CONFIG_HOME/HOME to a writable folder in the repo
|
| 5 |
- create a minimal .streamlit/config.toml if missing
|
| 6 |
+
- ensure Hugging Face / transformers cache dirs are repo-local and writable
|
| 7 |
- add repo/src to sys.path so `from data_loader import ...` works
|
| 8 |
- run the actual app (src/app.py) as __main__
|
| 9 |
"""
|
|
|
|
| 11 |
from pathlib import Path
|
| 12 |
import runpy
|
| 13 |
import sys
|
| 14 |
+
import stat
|
| 15 |
|
| 16 |
ROOT = Path(__file__).parent.resolve()
|
| 17 |
STREAMLIT_DIR = ROOT.joinpath(".streamlit")
|
| 18 |
|
| 19 |
# --- FORCE environment variables (overwrite any existing HF defaults) ---
|
|
|
|
| 20 |
os.environ["XDG_CONFIG_HOME"] = str(STREAMLIT_DIR)
|
| 21 |
# Ensure HOME is a writable folder inside the repo as well
|
| 22 |
os.environ["HOME"] = str(ROOT)
|
| 23 |
# Also set STREAMLIT_RUNTIME_DIR (added safety)
|
| 24 |
os.environ.setdefault("STREAMLIT_RUNTIME_DIR", str(ROOT.joinpath(".streamlit", "runtime")))
|
| 25 |
|
| 26 |
+
# --- Setup repository-local cache for huggingface / transformers ---
|
| 27 |
+
# Put all caches under repo .cache so the process can create and write to them
|
| 28 |
+
CACHE_ROOT = ROOT.joinpath(".cache")
|
| 29 |
+
TF_CACHE = CACHE_ROOT.joinpath("transformers")
|
| 30 |
+
HF_HOME = CACHE_ROOT.joinpath("huggingface")
|
| 31 |
+
DATASETS_CACHE = CACHE_ROOT.joinpath("datasets")
|
| 32 |
+
|
| 33 |
+
# Set env vars used by HF libs
|
| 34 |
+
os.environ.setdefault("XDG_CACHE_HOME", str(CACHE_ROOT))
|
| 35 |
+
os.environ.setdefault("TRANSFORMERS_CACHE", str(TF_CACHE))
|
| 36 |
+
os.environ.setdefault("HF_HOME", str(HF_HOME))
|
| 37 |
+
os.environ.setdefault("HF_DATASETS_CACHE", str(DATASETS_CACHE))
|
| 38 |
+
# Optional: also set general cache-related vars
|
| 39 |
+
os.environ.setdefault("HF_METRICS_CACHE", str(CACHE_ROOT.joinpath("metrics")))
|
| 40 |
+
|
| 41 |
# 2) Create .streamlit and a minimal config.toml if it doesn't exist
|
| 42 |
STREAMLIT_DIR.mkdir(parents=True, exist_ok=True)
|
| 43 |
cfg = STREAMLIT_DIR.joinpath("config.toml")
|
|
|
|
| 57 |
runtime_dir = Path(os.environ.get("STREAMLIT_RUNTIME_DIR"))
|
| 58 |
runtime_dir.mkdir(parents=True, exist_ok=True)
|
| 59 |
|
| 60 |
+
# Make sure cache dirs exist and are world-writable (so HF download locks work)
|
| 61 |
+
for d in (CACHE_ROOT, TF_CACHE, HF_HOME, DATASETS_CACHE, runtime_dir):
|
| 62 |
+
try:
|
| 63 |
+
d.mkdir(parents=True, exist_ok=True)
|
| 64 |
+
# chmod 0o777 so non-root runtime user can create lock files & write
|
| 65 |
+
d.chmod(0o777)
|
| 66 |
+
except Exception:
|
| 67 |
+
# best-effort; continue (we will show diagnostics below)
|
| 68 |
+
pass
|
| 69 |
+
|
| 70 |
# --- ADD src directory to Python import path so app can import modules by name ---
|
| 71 |
SRC_DIR = str(ROOT.joinpath("src"))
|
| 72 |
if SRC_DIR not in sys.path:
|
|
|
|
| 77 |
print("streamlit_app launcher: XDG_CONFIG_HOME =", os.environ.get("XDG_CONFIG_HOME"))
|
| 78 |
print("streamlit_app launcher: HOME =", os.environ.get("HOME"))
|
| 79 |
print("streamlit_app launcher: STREAMLIT_RUNTIME_DIR =", os.environ.get("STREAMLIT_RUNTIME_DIR"))
|
| 80 |
+
print("streamlit_app launcher: XDG_CACHE_HOME =", os.environ.get("XDG_CACHE_HOME"))
|
| 81 |
+
print("streamlit_app launcher: TRANSFORMERS_CACHE =", os.environ.get("TRANSFORMERS_CACHE"))
|
| 82 |
+
print("streamlit_app launcher: HF_HOME =", os.environ.get("HF_HOME"))
|
| 83 |
+
print("streamlit_app launcher: HF_DATASETS_CACHE =", os.environ.get("HF_DATASETS_CACHE"))
|
| 84 |
print("streamlit_app launcher: sys.path[0] =", sys.path[0])
|
| 85 |
+
# show permission bits for the main cache dir
|
| 86 |
+
try:
|
| 87 |
+
st = CACHE_ROOT.stat()
|
| 88 |
+
print("streamlit_app launcher: .cache exists, mode:", oct(st.st_mode & 0o777))
|
| 89 |
+
except Exception:
|
| 90 |
+
print("streamlit_app launcher: .cache stat failed or missing")
|
| 91 |
sys.stdout.flush()
|
| 92 |
|
| 93 |
# 4) Run your real Streamlit app script (runs as __main__)
|