Spaces:
Sleeping
Sleeping
File size: 2,072 Bytes
8d8bf0e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
from pathlib import Path
from typing import Union, Dict
# --- project roots ---
PROJECT_ROOT = Path(__file__).resolve().parents[2]
DATA_DIR = PROJECT_ROOT / "data"
RAW_DIR = DATA_DIR / "raw"
PROCESSED_DIR = DATA_DIR / "processed"
CACHE_DIR = DATA_DIR / "cache"
LOGS_DIR = PROJECT_ROOT / "logs"
MODELS_DIR = PROJECT_ROOT / "src" / "models"
def ensure_dir(path: Union[str, Path]) -> Path:
"""
Ensure a directory exists. Accepts either a str or a pathlib.Path.
Returns a pathlib.Path.
"""
p = Path(path) if not isinstance(path, Path) else path
p.mkdir(parents=True, exist_ok=True)
return p
def get_raw_path(dataset: str) -> Path:
""".../data/raw/<dataset>"""
return ensure_dir(RAW_DIR / dataset)
def get_processed_path(dataset: str) -> Path:
""".../data/processed/<dataset>"""
return ensure_dir(PROCESSED_DIR / dataset)
def get_logs_path() -> Path:
""".../logs"""
return ensure_dir(LOGS_DIR)
def get_dataset_paths(dataset: str) -> Dict[str, Path]:
"""
Convenience bundle of dataset-related paths.
NOTE: returns Path objects (not strings) for consistency.
"""
dataset = dataset.lower()
processed_dir = get_processed_path(dataset)
return {
"raw": get_raw_path(dataset),
"processed": processed_dir,
"cache": ensure_dir(CACHE_DIR / dataset),
"logs": get_logs_path(),
# Parquet input files
"item_meta_emb_path": processed_dir / "item_meta_emb.parquet",
"item_image_emb_path": processed_dir / "item_image_emb.parquet",
"item_text_emb_path": processed_dir / "item_text_emb.parquet",
# FAISS-related npy features
"meta_features_path": processed_dir / "meta_features.npy",
"text_features_path": processed_dir / "text_features.npy",
"image_features_path": processed_dir / "image_features.npy",
"labels_path": processed_dir / "labels.json",
# ✅ Add missing FAISS fusion output path
"faiss_fusion_path": processed_dir / "faiss_fusion.index",
} |