cove-ui / src /utils /paths.py
mickey1976's picture
Upload working Gradio UI for CoVE
8d8bf0e
from pathlib import Path
from typing import Union, Dict
# --- project roots ---
PROJECT_ROOT = Path(__file__).resolve().parents[2]
DATA_DIR = PROJECT_ROOT / "data"
RAW_DIR = DATA_DIR / "raw"
PROCESSED_DIR = DATA_DIR / "processed"
CACHE_DIR = DATA_DIR / "cache"
LOGS_DIR = PROJECT_ROOT / "logs"
MODELS_DIR = PROJECT_ROOT / "src" / "models"
def ensure_dir(path: Union[str, Path]) -> Path:
"""
Ensure a directory exists. Accepts either a str or a pathlib.Path.
Returns a pathlib.Path.
"""
p = Path(path) if not isinstance(path, Path) else path
p.mkdir(parents=True, exist_ok=True)
return p
def get_raw_path(dataset: str) -> Path:
""".../data/raw/<dataset>"""
return ensure_dir(RAW_DIR / dataset)
def get_processed_path(dataset: str) -> Path:
""".../data/processed/<dataset>"""
return ensure_dir(PROCESSED_DIR / dataset)
def get_logs_path() -> Path:
""".../logs"""
return ensure_dir(LOGS_DIR)
def get_dataset_paths(dataset: str) -> Dict[str, Path]:
"""
Convenience bundle of dataset-related paths.
NOTE: returns Path objects (not strings) for consistency.
"""
dataset = dataset.lower()
processed_dir = get_processed_path(dataset)
return {
"raw": get_raw_path(dataset),
"processed": processed_dir,
"cache": ensure_dir(CACHE_DIR / dataset),
"logs": get_logs_path(),
# Parquet input files
"item_meta_emb_path": processed_dir / "item_meta_emb.parquet",
"item_image_emb_path": processed_dir / "item_image_emb.parquet",
"item_text_emb_path": processed_dir / "item_text_emb.parquet",
# FAISS-related npy features
"meta_features_path": processed_dir / "meta_features.npy",
"text_features_path": processed_dir / "text_features.npy",
"image_features_path": processed_dir / "image_features.npy",
"labels_path": processed_dir / "labels.json",
# ✅ Add missing FAISS fusion output path
"faiss_fusion_path": processed_dir / "faiss_fusion.index",
}