File size: 2,003 Bytes
1aa7fae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
"""
Data registration script for the predictive maintenance project.

This script is the analogue of the notebook's `data_register.py`:
- It ensures the Hugging Face dataset repo exists.
- It uploads the raw engine dataset from the local `data/` folder
  into the dataset repo so it can be consumed by other stages
  (EDA, data preparation, model training) using a consistent source.
"""

from __future__ import annotations

import sys
from pathlib import Path

try:
    import config
    from hf_data_utils import register_raw_engine_data_to_hf
except ImportError as e:
    print(f"ERROR: Failed to import modules: {e}", file=sys.stderr)
    print(f"Python path: {sys.path}", file=sys.stderr)
    sys.exit(1)


def main() -> None:
    try:
        print(f"PROJECT_ROOT: {config.PROJECT_ROOT}")
        print(f"RAW_DATA_FILE: {config.RAW_DATA_FILE}")
        print(f"RAW_DATA_FILE exists: {config.RAW_DATA_FILE.exists()}")
        print(f"HF_DATASET_REPO: {config.HF_DATASET_REPO}")
        print(f"HF_TOKEN is set: {bool(config.HF_TOKEN)}")
        
        if not config.RAW_DATA_FILE.exists():
            raise FileNotFoundError(
                f"Expected raw data at {config.RAW_DATA_FILE}, "
                "but the file does not exist. Make sure `engine_data.csv` "
                "is placed in the `data/` folder."
            )

        if not config.HF_TOKEN:
            raise ValueError(
                "HF_TOKEN is not set. Please set it as an environment variable "
                "or in GitHub Secrets."
            )

        print(f"Registering raw engine dataset from: {config.RAW_DATA_FILE}")
        print(f"Target HF dataset repo: {config.HF_DATASET_REPO}")
        register_raw_engine_data_to_hf()
        print("✅ Dataset registration to Hugging Face completed.")
    except Exception as e:
        print(f"ERROR: {type(e).__name__}: {e}", file=sys.stderr)
        import traceback
        traceback.print_exc()
        sys.exit(1)


if __name__ == "__main__":
    main()