Spaces:
Sleeping
Sleeping
Rachel Ding commited on
Commit ·
25c33f6
1
Parent(s): 349e3bf
Speed up: cache repo file list; Dasheng dropdown exclude fold*
Browse files- dataset_loader.py +17 -5
dataset_loader.py
CHANGED
|
@@ -16,10 +16,21 @@ REPO_TYPE = "dataset"
|
|
| 16 |
ROOT_PREFIX = "batch_outputs/"
|
| 17 |
DASHENG_PREFIX = "batch_outputs_dasheng/"
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
def _get_sample_ids(prefix: str = ROOT_PREFIX) -> list[str]:
|
| 21 |
"""List sample IDs (e.g. 07_003277) under given prefix in repo."""
|
| 22 |
-
files =
|
| 23 |
seen = set()
|
| 24 |
pat = re.escape(prefix.rstrip("/")) + r"/([^/]+)/"
|
| 25 |
for f in files:
|
|
@@ -62,13 +73,14 @@ def list_samples() -> list[str]:
|
|
| 62 |
|
| 63 |
|
| 64 |
def list_samples_dasheng() -> list[str]:
|
| 65 |
-
"""Return only sample IDs
|
| 66 |
-
|
|
|
|
| 67 |
|
| 68 |
|
| 69 |
def _find_files(inner: str) -> list[str]:
|
| 70 |
-
"""List all repo files under inner path."""
|
| 71 |
-
files =
|
| 72 |
return [f for f in files if f.startswith(inner + "/")]
|
| 73 |
|
| 74 |
|
|
|
|
| 16 |
ROOT_PREFIX = "batch_outputs/"
|
| 17 |
DASHENG_PREFIX = "batch_outputs_dasheng/"
|
| 18 |
|
| 19 |
+
# Cache full repo file list so we only call list_repo_files once per process (major speedup)
|
| 20 |
+
_cached_repo_files: Optional[list[str]] = None
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def _get_repo_files() -> list[str]:
|
| 24 |
+
"""Return full list of repo file paths, cached after first call."""
|
| 25 |
+
global _cached_repo_files
|
| 26 |
+
if _cached_repo_files is None:
|
| 27 |
+
_cached_repo_files = list_repo_files(REPO_ID, repo_type=REPO_TYPE)
|
| 28 |
+
return _cached_repo_files
|
| 29 |
+
|
| 30 |
|
| 31 |
def _get_sample_ids(prefix: str = ROOT_PREFIX) -> list[str]:
|
| 32 |
"""List sample IDs (e.g. 07_003277) under given prefix in repo."""
|
| 33 |
+
files = _get_repo_files()
|
| 34 |
seen = set()
|
| 35 |
pat = re.escape(prefix.rstrip("/")) + r"/([^/]+)/"
|
| 36 |
for f in files:
|
|
|
|
| 73 |
|
| 74 |
|
| 75 |
def list_samples_dasheng() -> list[str]:
|
| 76 |
+
"""Return only sample IDs for Dasheng view: from batch_outputs_dasheng, excluding fold* (UrbanSound8k)."""
|
| 77 |
+
ids = _get_sample_ids(DASHENG_PREFIX)
|
| 78 |
+
return sorted([x for x in ids if not x.startswith("fold")])
|
| 79 |
|
| 80 |
|
| 81 |
def _find_files(inner: str) -> list[str]:
|
| 82 |
+
"""List all repo files under inner path (uses cached repo file list)."""
|
| 83 |
+
files = _get_repo_files()
|
| 84 |
return [f for f in files if f.startswith(inner + "/")]
|
| 85 |
|
| 86 |
|