import logging import os from pathlib import Path logger = logging.getLogger(__name__) def load_data_file() -> str: hf_token = os.getenv("HF_TOKEN") if hf_token: try: from huggingface_hub import hf_hub_download logger.info("Dataset: muhalwan/optimized_data_mhs") file_path = hf_hub_download( repo_id="muhalwan/optimized_data_mhs", filename="optimized_data.xlsx", repo_type="dataset", token=hf_token, cache_dir="./hf_cache", ) logger.info("Data loaded successfully from HF dataset") return file_path except Exception as e: logger.error(f"Failed to download from HF dataset: {e}") local_path = "data/optimized_data.xlsx" if Path(local_path).exists(): logger.info(f"Loading data from local file: {local_path}") return local_path raise FileNotFoundError( "No data source available. Either set HF_TOKEN environment variable " "or place data file at 'data/optimized_data.xlsx'" ) def get_data_source_info() -> dict: hf_token = os.getenv("HF_TOKEN") local_exists = Path("data/optimized_data.xlsx").exists() return { "hf_token_available": bool(hf_token), "local_file_available": local_exists, "will_use_hf_dataset": bool(hf_token), "will_use_local": not hf_token and local_exists, "dataset_repo": "muhalwan/optimized_data_mhs" if hf_token else None, "local_path": "data/optimized_data.xlsx" if local_exists else None, } if __name__ == "__main__": logging.basicConfig(level=logging.INFO) print("Data Information") info = get_data_source_info() for key, value in info.items(): print(f" {key}: {value}") try: file_path = load_data_file() print(f"\nSuccess! Data file: {file_path}") except Exception as e: print(f"\nFailed: {e}")