Spaces:
Sleeping
Sleeping
| import logging | |
| import os | |
| from pathlib import Path | |
| logger = logging.getLogger(__name__) | |
| def load_data_file() -> str: | |
| hf_token = os.getenv("HF_TOKEN") | |
| if hf_token: | |
| try: | |
| from huggingface_hub import hf_hub_download | |
| logger.info("Dataset: muhalwan/optimized_data_mhs") | |
| file_path = hf_hub_download( | |
| repo_id="muhalwan/optimized_data_mhs", | |
| filename="optimized_data.xlsx", | |
| repo_type="dataset", | |
| token=hf_token, | |
| cache_dir="./hf_cache", | |
| ) | |
| logger.info("Data loaded successfully from HF dataset") | |
| return file_path | |
| except Exception as e: | |
| logger.error(f"Failed to download from HF dataset: {e}") | |
| local_path = "data/optimized_data.xlsx" | |
| if Path(local_path).exists(): | |
| logger.info(f"Loading data from local file: {local_path}") | |
| return local_path | |
| raise FileNotFoundError( | |
| "No data source available. Either set HF_TOKEN environment variable " | |
| "or place data file at 'data/optimized_data.xlsx'" | |
| ) | |
| def get_data_source_info() -> dict: | |
| hf_token = os.getenv("HF_TOKEN") | |
| local_exists = Path("data/optimized_data.xlsx").exists() | |
| return { | |
| "hf_token_available": bool(hf_token), | |
| "local_file_available": local_exists, | |
| "will_use_hf_dataset": bool(hf_token), | |
| "will_use_local": not hf_token and local_exists, | |
| "dataset_repo": "muhalwan/optimized_data_mhs" if hf_token else None, | |
| "local_path": "data/optimized_data.xlsx" if local_exists else None, | |
| } | |
| if __name__ == "__main__": | |
| logging.basicConfig(level=logging.INFO) | |
| print("Data Information") | |
| info = get_data_source_info() | |
| for key, value in info.items(): | |
| print(f" {key}: {value}") | |
| try: | |
| file_path = load_data_file() | |
| print(f"\nSuccess! Data file: {file_path}") | |
| except Exception as e: | |
| print(f"\nFailed: {e}") | |