"""Centralized configuration for the book recommender application.""" import os from pathlib import Path from dotenv import load_dotenv load_dotenv() BASE_DIR = Path(__file__).resolve().parent.parent.parent.parent DATA_DIR = BASE_DIR / "data" # Ensure data directory exists DATA_DIR.mkdir(parents=True, exist_ok=True) RAW_DATA_DIR = DATA_DIR / "raw" PROCESSED_DATA_DIR = DATA_DIR / "processed" RAW_DATA_PATH = RAW_DATA_DIR / "books_prepared.csv" PROCESSED_DATA_PATH = PROCESSED_DATA_DIR / "books_cleaned.parquet" EMBEDDINGS_PATH = PROCESSED_DATA_DIR / "book_embeddings.npy" EMBEDDING_METADATA_PATH = PROCESSED_DATA_DIR / "embedding_metadata.json" CLUSTERS_CACHE_PATH = PROCESSED_DATA_DIR / "cluster_cache.pkl" EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "all-MiniLM-L6-v2") EMBEDDING_DIMENSION = 384 EMBEDDING_DEVICE = os.getenv("EMBEDDING_DEVICE", "cpu") DEFAULT_BATCH_SIZE = 64 DEFAULT_TOP_K = 10 MIN_SIMILARITY_THRESHOLD = 0.3 NUM_CLUSTERS = int(os.getenv("NUM_CLUSTERS", "50")) APP_VERSION = "0.1.0" FALLBACK_COVER_URL = "https://placehold.co/200x300/667eea/white?text=No+Cover" # --- Data/Model Versioning (Future Consideration) # For production systems, consider implementing a robust data and model # versioning system (e.g., DVC - Data Version Control) to track changes # to processed data and generated embeddings. For this MVP, manual # management or timestamping of files is suggested if versioning is critical.