nice-bill's picture
initial commit
cdb73a8
"""Centralized configuration for the book recommender application."""
import os
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()
BASE_DIR = Path(__file__).resolve().parent.parent.parent.parent
DATA_DIR = BASE_DIR / "data"
# Ensure data directory exists
DATA_DIR.mkdir(parents=True, exist_ok=True)
RAW_DATA_DIR = DATA_DIR / "raw"
PROCESSED_DATA_DIR = DATA_DIR / "processed"
RAW_DATA_PATH = RAW_DATA_DIR / "books_prepared.csv"
PROCESSED_DATA_PATH = PROCESSED_DATA_DIR / "books_cleaned.parquet"
EMBEDDINGS_PATH = PROCESSED_DATA_DIR / "book_embeddings.npy"
EMBEDDING_METADATA_PATH = PROCESSED_DATA_DIR / "embedding_metadata.json"
CLUSTERS_CACHE_PATH = PROCESSED_DATA_DIR / "cluster_cache.pkl"
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "all-MiniLM-L6-v2")
EMBEDDING_DIMENSION = 384
EMBEDDING_DEVICE = os.getenv("EMBEDDING_DEVICE", "cpu")
DEFAULT_BATCH_SIZE = 64
DEFAULT_TOP_K = 10
MIN_SIMILARITY_THRESHOLD = 0.3
NUM_CLUSTERS = int(os.getenv("NUM_CLUSTERS", "50"))
APP_VERSION = "0.1.0"
FALLBACK_COVER_URL = "https://placehold.co/200x300/667eea/white?text=No+Cover"
# --- Data/Model Versioning (Future Consideration)
# For production systems, consider implementing a robust data and model
# versioning system (e.g., DVC - Data Version Control) to track changes
# to processed data and generated embeddings. For this MVP, manual
# management or timestamping of files is suggested if versioning is critical.