Spaces:
Sleeping
Sleeping
File size: 1,993 Bytes
48b5cb1 68c5852 48b5cb1 6a0a429 48b5cb1 68c5852 48b5cb1 6a0a429 48b5cb1 6a0a429 48b5cb1 6a0a429 48b5cb1 68c5852 48b5cb1 6a0a429 48b5cb1 6a0a429 48b5cb1 6a0a429 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import logging
import os
from pathlib import Path
logger = logging.getLogger(__name__)
def load_data_file() -> str:
hf_token = os.getenv("HF_TOKEN")
if hf_token:
try:
from huggingface_hub import hf_hub_download
logger.info("Dataset: muhalwan/optimized_data_mhs")
file_path = hf_hub_download(
repo_id="muhalwan/optimized_data_mhs",
filename="optimized_data.xlsx",
repo_type="dataset",
token=hf_token,
cache_dir="./hf_cache",
)
logger.info("Data loaded successfully from HF dataset")
return file_path
except Exception as e:
logger.error(f"Failed to download from HF dataset: {e}")
local_path = "data/optimized_data.xlsx"
if Path(local_path).exists():
logger.info(f"Loading data from local file: {local_path}")
return local_path
raise FileNotFoundError(
"No data source available. Either set HF_TOKEN environment variable "
"or place data file at 'data/optimized_data.xlsx'"
)
def get_data_source_info() -> dict:
hf_token = os.getenv("HF_TOKEN")
local_exists = Path("data/optimized_data.xlsx").exists()
return {
"hf_token_available": bool(hf_token),
"local_file_available": local_exists,
"will_use_hf_dataset": bool(hf_token),
"will_use_local": not hf_token and local_exists,
"dataset_repo": "muhalwan/optimized_data_mhs" if hf_token else None,
"local_path": "data/optimized_data.xlsx" if local_exists else None,
}
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
print("Data Information")
info = get_data_source_info()
for key, value in info.items():
print(f" {key}: {value}")
try:
file_path = load_data_file()
print(f"\nSuccess! Data file: {file_path}")
except Exception as e:
print(f"\nFailed: {e}")
|