dasdebanna commited on
Commit
f80343c
·
1 Parent(s): f978fb1

Launcher: force repo-local HF/transformers caches and chmod; fix runtime dirs

Browse files
Files changed (1) hide show
  1. streamlit_app.py +73 -49
streamlit_app.py CHANGED
@@ -4,76 +4,100 @@ Launcher for Hugging Face Spaces / Streamlit:
4
  - force XDG_CONFIG_HOME/HOME to a writable folder in the repo
5
  - create a minimal .streamlit/config.toml if missing
6
  - ensure Hugging Face / transformers cache dirs are repo-local and writable
7
- - add repo/src to sys.path so `from data_loader import ...` works
8
  - run the actual app (src/app.py) as __main__
 
 
 
 
9
  """
10
  import os
11
  from pathlib import Path
12
  import runpy
13
  import sys
14
- import stat
15
 
16
  ROOT = Path(__file__).parent.resolve()
17
  STREAMLIT_DIR = ROOT.joinpath(".streamlit")
18
 
19
- # --- FORCE environment variables (overwrite any existing HF defaults) ---
 
 
 
20
  os.environ["XDG_CONFIG_HOME"] = str(STREAMLIT_DIR)
21
  # Ensure HOME is a writable folder inside the repo as well
22
  os.environ["HOME"] = str(ROOT)
23
- # Also set STREAMLIT_RUNTIME_DIR (added safety)
24
- os.environ.setdefault("STREAMLIT_RUNTIME_DIR", str(ROOT.joinpath(".streamlit", "runtime")))
25
 
26
- # --- Setup repository-local cache for huggingface / transformers ---
27
- # Put all caches under repo .cache so the process can create and write to them
28
  CACHE_ROOT = ROOT.joinpath(".cache")
 
 
29
  TF_CACHE = CACHE_ROOT.joinpath("transformers")
 
30
  HF_HOME = CACHE_ROOT.joinpath("huggingface")
 
31
  DATASETS_CACHE = CACHE_ROOT.joinpath("datasets")
32
-
33
- # Set env vars used by HF libs
34
- os.environ.setdefault("XDG_CACHE_HOME", str(CACHE_ROOT))
35
- os.environ.setdefault("TRANSFORMERS_CACHE", str(TF_CACHE))
36
- os.environ.setdefault("HF_HOME", str(HF_HOME))
37
- os.environ.setdefault("HF_DATASETS_CACHE", str(DATASETS_CACHE))
38
- # Optional: also set general cache-related vars
39
  os.environ.setdefault("HF_METRICS_CACHE", str(CACHE_ROOT.joinpath("metrics")))
40
 
41
- # 2) Create .streamlit and a minimal config.toml if it doesn't exist
42
- STREAMLIT_DIR.mkdir(parents=True, exist_ok=True)
43
- cfg = STREAMLIT_DIR.joinpath("config.toml")
44
- if not cfg.exists():
45
- cfg.write_text(
46
- "[server]\n"
47
- "headless = true\n"
48
- "port = 8501\n"
49
- "enableCORS = false\n"
50
- "enableWebsocketCompression = false\n\n"
51
- "[browser]\n"
52
- "gatherUsageStats = false\n",
53
- encoding="utf-8",
54
- )
55
 
56
- # Make sure runtime dir exists
57
- runtime_dir = Path(os.environ.get("STREAMLIT_RUNTIME_DIR"))
58
- runtime_dir.mkdir(parents=True, exist_ok=True)
 
 
 
 
 
59
 
60
- # Make sure cache dirs exist and are world-writable (so HF download locks work)
61
- for d in (CACHE_ROOT, TF_CACHE, HF_HOME, DATASETS_CACHE, runtime_dir):
 
 
 
62
  try:
63
- d.mkdir(parents=True, exist_ok=True)
64
- # chmod 0o777 so non-root runtime user can create lock files & write
65
- d.chmod(0o777)
66
- except Exception:
67
- # best-effort; continue (we will show diagnostics below)
68
- pass
 
 
 
 
 
 
69
 
70
- # --- ADD src directory to Python import path so app can import modules by name ---
 
 
71
  SRC_DIR = str(ROOT.joinpath("src"))
72
  if SRC_DIR not in sys.path:
73
- # Insert at front so local src overrides other packages with same names
74
  sys.path.insert(0, SRC_DIR)
75
 
76
- # 3) Diagnostic prints for logs (helps debug in Space logs)
 
 
 
 
 
 
 
 
 
77
  print("streamlit_app launcher: XDG_CONFIG_HOME =", os.environ.get("XDG_CONFIG_HOME"))
78
  print("streamlit_app launcher: HOME =", os.environ.get("HOME"))
79
  print("streamlit_app launcher: STREAMLIT_RUNTIME_DIR =", os.environ.get("STREAMLIT_RUNTIME_DIR"))
@@ -82,13 +106,13 @@ print("streamlit_app launcher: TRANSFORMERS_CACHE =", os.environ.get("TRANSFORME
82
  print("streamlit_app launcher: HF_HOME =", os.environ.get("HF_HOME"))
83
  print("streamlit_app launcher: HF_DATASETS_CACHE =", os.environ.get("HF_DATASETS_CACHE"))
84
  print("streamlit_app launcher: sys.path[0] =", sys.path[0])
85
- # show permission bits for the main cache dir
86
- try:
87
- st = CACHE_ROOT.stat()
88
- print("streamlit_app launcher: .cache exists, mode:", oct(st.st_mode & 0o777))
89
- except Exception:
90
- print("streamlit_app launcher: .cache stat failed or missing")
91
  sys.stdout.flush()
92
 
93
- # 4) Run your real Streamlit app script (runs as __main__)
 
 
94
  runpy.run_path(str(ROOT.joinpath("src", "app.py")), run_name="__main__")
 
4
  - force XDG_CONFIG_HOME/HOME to a writable folder in the repo
5
  - create a minimal .streamlit/config.toml if missing
6
  - ensure Hugging Face / transformers cache dirs are repo-local and writable
7
+ - add repo/src to sys.path so imports like `from data_loader import ...` work
8
  - run the actual app (src/app.py) as __main__
9
+ Notes:
10
+ - We *assign* env vars (not setdefault) to ensure they are effective even if
11
+ the environment was pre-populated earlier by the runtime.
12
+ - We chmod cache dirs to 0o777 so non-root container user can create locks/write.
13
  """
14
  import os
15
  from pathlib import Path
16
  import runpy
17
  import sys
 
18
 
19
  ROOT = Path(__file__).parent.resolve()
20
  STREAMLIT_DIR = ROOT.joinpath(".streamlit")
21
 
22
+ # ------------------------------
23
+ # Force environment variables (overwrite any existing HF / Streamlit defaults)
24
+ # ------------------------------
25
+ # Use a repo-local .streamlit directory so Streamlit won't try to write to '/'
26
  os.environ["XDG_CONFIG_HOME"] = str(STREAMLIT_DIR)
27
  # Ensure HOME is a writable folder inside the repo as well
28
  os.environ["HOME"] = str(ROOT)
29
+ # Force STREAMLIT_RUNTIME_DIR to repo-local runtime (overwrite anything)
30
+ os.environ["STREAMLIT_RUNTIME_DIR"] = str(ROOT.joinpath(".streamlit", "runtime"))
31
 
32
+ # Set repo-local caches for HF libs and transformers (overwrite any existing)
 
33
  CACHE_ROOT = ROOT.joinpath(".cache")
34
+ os.environ["XDG_CACHE_HOME"] = str(CACHE_ROOT)
35
+ # transformers prefers TRANSFORMERS_CACHE (deprecated) or HF_HOME; set both for compatibility
36
  TF_CACHE = CACHE_ROOT.joinpath("transformers")
37
+ os.environ["TRANSFORMERS_CACHE"] = str(TF_CACHE)
38
  HF_HOME = CACHE_ROOT.joinpath("huggingface")
39
+ os.environ["HF_HOME"] = str(HF_HOME)
40
  DATASETS_CACHE = CACHE_ROOT.joinpath("datasets")
41
+ os.environ["HF_DATASETS_CACHE"] = str(DATASETS_CACHE)
42
+ # optional metric cache
 
 
 
 
 
43
  os.environ.setdefault("HF_METRICS_CACHE", str(CACHE_ROOT.joinpath("metrics")))
44
 
45
+ # ------------------------------
46
+ # Create config / runtime / cache directories and make them writable
47
+ # ------------------------------
48
+ def ensure_dir_and_chmod(p: Path, mode: int = 0o777):
49
+ try:
50
+ p.mkdir(parents=True, exist_ok=True)
51
+ p.chmod(mode)
52
+ except Exception as e:
53
+ # best-effort; continue and print diagnostics
54
+ print(f"streamlit_app launcher: ensure_dir_and_chmod failed for {p}: {e}")
 
 
 
 
55
 
56
+ # Create important directories
57
+ ensure_dir_and_chmod(STREAMLIT_DIR)
58
+ ensure_dir_and_chmod(ROOT.joinpath(".streamlit", "runtime"))
59
+ ensure_dir_and_chmod(CACHE_ROOT)
60
+ ensure_dir_and_chmod(TF_CACHE)
61
+ ensure_dir_and_chmod(HF_HOME)
62
+ ensure_dir_and_chmod(DATASETS_CACHE)
63
+ ensure_dir_and_chmod(Path(os.environ.get("STREAMLIT_RUNTIME_DIR")))
64
 
65
+ # ------------------------------
66
+ # Create a minimal streamlit config if missing
67
+ # ------------------------------
68
+ cfg = STREAMLIT_DIR.joinpath("config.toml")
69
+ if not cfg.exists():
70
  try:
71
+ cfg.write_text(
72
+ "[server]\n"
73
+ "headless = true\n"
74
+ "port = 8501\n"
75
+ "enableCORS = false\n"
76
+ "enableWebsocketCompression = false\n\n"
77
+ "[browser]\n"
78
+ "gatherUsageStats = false\n",
79
+ encoding="utf-8",
80
+ )
81
+ except Exception as e:
82
+ print("streamlit_app launcher: failed to write config.toml:", e)
83
 
84
+ # ------------------------------
85
+ # Add src directory to Python import path so app can import modules by name
86
+ # ------------------------------
87
  SRC_DIR = str(ROOT.joinpath("src"))
88
  if SRC_DIR not in sys.path:
 
89
  sys.path.insert(0, SRC_DIR)
90
 
91
+ # ------------------------------
92
+ # Print diagnostics (critical - will show in HF container logs)
93
+ # ------------------------------
94
+ def stat_mode(p: Path):
95
+ try:
96
+ s = p.stat()
97
+ return oct(s.st_mode & 0o777)
98
+ except Exception:
99
+ return "MISSING"
100
+
101
  print("streamlit_app launcher: XDG_CONFIG_HOME =", os.environ.get("XDG_CONFIG_HOME"))
102
  print("streamlit_app launcher: HOME =", os.environ.get("HOME"))
103
  print("streamlit_app launcher: STREAMLIT_RUNTIME_DIR =", os.environ.get("STREAMLIT_RUNTIME_DIR"))
 
106
  print("streamlit_app launcher: HF_HOME =", os.environ.get("HF_HOME"))
107
  print("streamlit_app launcher: HF_DATASETS_CACHE =", os.environ.get("HF_DATASETS_CACHE"))
108
  print("streamlit_app launcher: sys.path[0] =", sys.path[0])
109
+ print(".cache dir mode:", stat_mode(CACHE_ROOT))
110
+ print("transformers cache mode:", stat_mode(TF_CACHE))
111
+ print("hf home mode:", stat_mode(HF_HOME))
112
+ print("datasets cache mode:", stat_mode(DATASETS_CACHE))
 
 
113
  sys.stdout.flush()
114
 
115
+ # ------------------------------
116
+ # Run the real app
117
+ # ------------------------------
118
  runpy.run_path(str(ROOT.joinpath("src", "app.py")), run_name="__main__")