Eric Xu commited on
Use /data/nemotron for persistent storage on HF Spaces
Browse files- web/app.py +3 -2
web/app.py
CHANGED
|
@@ -78,6 +78,7 @@ _nemotron_ds = None
|
|
| 78 |
_nemotron_checked = False
|
| 79 |
|
| 80 |
NEMOTRON_SEARCH_PATHS = [
|
|
|
|
| 81 |
PROJECT_ROOT / "data" / "nemotron",
|
| 82 |
Path.home() / "data" / "nvidia" / "Nemotron-Personas-USA",
|
| 83 |
Path.home() / "data" / "nemotron",
|
|
@@ -255,7 +256,7 @@ class SuggestChangesInput(BaseModel):
|
|
| 255 |
|
| 256 |
|
| 257 |
class NemotronPathInput(BaseModel):
|
| 258 |
-
path: str = "data/nemotron"
|
| 259 |
dataset: str = "USA"
|
| 260 |
|
| 261 |
|
|
@@ -264,7 +265,7 @@ async def setup_nemotron(input: NemotronPathInput):
|
|
| 264 |
"""Point to existing data, or download a Nemotron dataset to the given path."""
|
| 265 |
p = Path(input.path).expanduser().resolve()
|
| 266 |
# Prevent path traversal — must be within project or /tmp
|
| 267 |
-
if not (p.is_relative_to(PROJECT_ROOT) or p.is_relative_to(Path("/tmp"))):
|
| 268 |
raise HTTPException(403, "Path must be within the project directory")
|
| 269 |
hf_name = NEMOTRON_DATASETS.get(input.dataset, NEMOTRON_DATASETS["USA"])
|
| 270 |
|
|
|
|
| 78 |
_nemotron_checked = False
|
| 79 |
|
| 80 |
NEMOTRON_SEARCH_PATHS = [
|
| 81 |
+
Path("/data/nemotron"), # HF Spaces persistent storage
|
| 82 |
PROJECT_ROOT / "data" / "nemotron",
|
| 83 |
Path.home() / "data" / "nvidia" / "Nemotron-Personas-USA",
|
| 84 |
Path.home() / "data" / "nemotron",
|
|
|
|
| 256 |
|
| 257 |
|
| 258 |
class NemotronPathInput(BaseModel):
|
| 259 |
+
path: str = "/data/nemotron" if IS_SPACES else "data/nemotron"
|
| 260 |
dataset: str = "USA"
|
| 261 |
|
| 262 |
|
|
|
|
| 265 |
"""Point to existing data, or download a Nemotron dataset to the given path."""
|
| 266 |
p = Path(input.path).expanduser().resolve()
|
| 267 |
# Prevent path traversal — must be within project or /tmp
|
| 268 |
+
if not (p.is_relative_to(PROJECT_ROOT) or p.is_relative_to(Path("/tmp")) or p.is_relative_to(Path("/data"))):
|
| 269 |
raise HTTPException(403, "Path must be within the project directory")
|
| 270 |
hf_name = NEMOTRON_DATASETS.get(input.dataset, NEMOTRON_DATASETS["USA"])
|
| 271 |
|