dasdebanna commited on
Commit
f978fb1
·
1 Parent(s): 118bb5c

Launcher: ensure repo-local HF/transformers cache directories are created and writable

Browse files
Files changed (1) hide show
  1. streamlit_app.py +37 -1
streamlit_app.py CHANGED
@@ -3,6 +3,7 @@
3
  Launcher for Hugging Face Spaces / Streamlit:
4
  - force XDG_CONFIG_HOME/HOME to a writable folder in the repo
5
  - create a minimal .streamlit/config.toml if missing
 
6
  - add repo/src to sys.path so `from data_loader import ...` works
7
  - run the actual app (src/app.py) as __main__
8
  """
@@ -10,18 +11,33 @@ import os
10
  from pathlib import Path
11
  import runpy
12
  import sys
 
13
 
14
  ROOT = Path(__file__).parent.resolve()
15
  STREAMLIT_DIR = ROOT.joinpath(".streamlit")
16
 
17
  # --- FORCE environment variables (overwrite any existing HF defaults) ---
18
- # Use a repo-local .streamlit directory so Streamlit won't try to write to '/'
19
  os.environ["XDG_CONFIG_HOME"] = str(STREAMLIT_DIR)
20
  # Ensure HOME is a writable folder inside the repo as well
21
  os.environ["HOME"] = str(ROOT)
22
  # Also set STREAMLIT_RUNTIME_DIR (added safety)
23
  os.environ.setdefault("STREAMLIT_RUNTIME_DIR", str(ROOT.joinpath(".streamlit", "runtime")))
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  # 2) Create .streamlit and a minimal config.toml if it doesn't exist
26
  STREAMLIT_DIR.mkdir(parents=True, exist_ok=True)
27
  cfg = STREAMLIT_DIR.joinpath("config.toml")
@@ -41,6 +57,16 @@ if not cfg.exists():
41
  runtime_dir = Path(os.environ.get("STREAMLIT_RUNTIME_DIR"))
42
  runtime_dir.mkdir(parents=True, exist_ok=True)
43
 
 
 
 
 
 
 
 
 
 
 
44
  # --- ADD src directory to Python import path so app can import modules by name ---
45
  SRC_DIR = str(ROOT.joinpath("src"))
46
  if SRC_DIR not in sys.path:
@@ -51,7 +77,17 @@ if SRC_DIR not in sys.path:
51
  print("streamlit_app launcher: XDG_CONFIG_HOME =", os.environ.get("XDG_CONFIG_HOME"))
52
  print("streamlit_app launcher: HOME =", os.environ.get("HOME"))
53
  print("streamlit_app launcher: STREAMLIT_RUNTIME_DIR =", os.environ.get("STREAMLIT_RUNTIME_DIR"))
 
 
 
 
54
  print("streamlit_app launcher: sys.path[0] =", sys.path[0])
 
 
 
 
 
 
55
  sys.stdout.flush()
56
 
57
  # 4) Run your real Streamlit app script (runs as __main__)
 
3
  Launcher for Hugging Face Spaces / Streamlit:
4
  - force XDG_CONFIG_HOME/HOME to a writable folder in the repo
5
  - create a minimal .streamlit/config.toml if missing
6
+ - ensure Hugging Face / transformers cache dirs are repo-local and writable
7
  - add repo/src to sys.path so `from data_loader import ...` works
8
  - run the actual app (src/app.py) as __main__
9
  """
 
11
  from pathlib import Path
12
  import runpy
13
  import sys
14
+ import stat
15
 
16
  ROOT = Path(__file__).parent.resolve()
17
  STREAMLIT_DIR = ROOT.joinpath(".streamlit")
18
 
19
  # --- FORCE environment variables (overwrite any existing HF defaults) ---
 
20
  os.environ["XDG_CONFIG_HOME"] = str(STREAMLIT_DIR)
21
  # Ensure HOME is a writable folder inside the repo as well
22
  os.environ["HOME"] = str(ROOT)
23
  # Also set STREAMLIT_RUNTIME_DIR (added safety)
24
  os.environ.setdefault("STREAMLIT_RUNTIME_DIR", str(ROOT.joinpath(".streamlit", "runtime")))
25
 
26
+ # --- Setup repository-local cache for huggingface / transformers ---
27
+ # Put all caches under repo .cache so the process can create and write to them
28
+ CACHE_ROOT = ROOT.joinpath(".cache")
29
+ TF_CACHE = CACHE_ROOT.joinpath("transformers")
30
+ HF_HOME = CACHE_ROOT.joinpath("huggingface")
31
+ DATASETS_CACHE = CACHE_ROOT.joinpath("datasets")
32
+
33
+ # Set env vars used by HF libs
34
+ os.environ.setdefault("XDG_CACHE_HOME", str(CACHE_ROOT))
35
+ os.environ.setdefault("TRANSFORMERS_CACHE", str(TF_CACHE))
36
+ os.environ.setdefault("HF_HOME", str(HF_HOME))
37
+ os.environ.setdefault("HF_DATASETS_CACHE", str(DATASETS_CACHE))
38
+ # Optional: also set general cache-related vars
39
+ os.environ.setdefault("HF_METRICS_CACHE", str(CACHE_ROOT.joinpath("metrics")))
40
+
41
  # 2) Create .streamlit and a minimal config.toml if it doesn't exist
42
  STREAMLIT_DIR.mkdir(parents=True, exist_ok=True)
43
  cfg = STREAMLIT_DIR.joinpath("config.toml")
 
57
  runtime_dir = Path(os.environ.get("STREAMLIT_RUNTIME_DIR"))
58
  runtime_dir.mkdir(parents=True, exist_ok=True)
59
 
60
+ # Make sure cache dirs exist and are world-writable (so HF download locks work)
61
+ for d in (CACHE_ROOT, TF_CACHE, HF_HOME, DATASETS_CACHE, runtime_dir):
62
+ try:
63
+ d.mkdir(parents=True, exist_ok=True)
64
+ # chmod 0o777 so non-root runtime user can create lock files & write
65
+ d.chmod(0o777)
66
+ except Exception:
67
+ # best-effort; continue (we will show diagnostics below)
68
+ pass
69
+
70
  # --- ADD src directory to Python import path so app can import modules by name ---
71
  SRC_DIR = str(ROOT.joinpath("src"))
72
  if SRC_DIR not in sys.path:
 
77
  print("streamlit_app launcher: XDG_CONFIG_HOME =", os.environ.get("XDG_CONFIG_HOME"))
78
  print("streamlit_app launcher: HOME =", os.environ.get("HOME"))
79
  print("streamlit_app launcher: STREAMLIT_RUNTIME_DIR =", os.environ.get("STREAMLIT_RUNTIME_DIR"))
80
+ print("streamlit_app launcher: XDG_CACHE_HOME =", os.environ.get("XDG_CACHE_HOME"))
81
+ print("streamlit_app launcher: TRANSFORMERS_CACHE =", os.environ.get("TRANSFORMERS_CACHE"))
82
+ print("streamlit_app launcher: HF_HOME =", os.environ.get("HF_HOME"))
83
+ print("streamlit_app launcher: HF_DATASETS_CACHE =", os.environ.get("HF_DATASETS_CACHE"))
84
  print("streamlit_app launcher: sys.path[0] =", sys.path[0])
85
+ # show permission bits for the main cache dir
86
+ try:
87
+ st = CACHE_ROOT.stat()
88
+ print("streamlit_app launcher: .cache exists, mode:", oct(st.st_mode & 0o777))
89
+ except Exception:
90
+ print("streamlit_app launcher: .cache stat failed or missing")
91
  sys.stdout.flush()
92
 
93
  # 4) Run your real Streamlit app script (runs as __main__)