ddi / src /preprocessing /load_ddinter.py
github-actions[bot]
Deploy from GitHub Actions (fb28c05c54cf19184fc3f14f1bf3297ba5749ea2)
d29b763
from pathlib import Path
from preprocessing.artifact_manager import manager
BASE_DIR = Path(__file__).resolve().parents[2]
OUTPUT_PATH = BASE_DIR / 'data' / 'processed' / 'ddinter_combined.parquet'
def main() -> None:
df = manager.load_artifact('ddinter_combined')
print(f'Loaded processed DDInter artifact: {len(df)} rows, {len(df.columns)} columns')
print(df.head())
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
df.to_parquet(OUTPUT_PATH, index=False)
print(f'Saved processed DDInter artifact to {OUTPUT_PATH}')
if __name__ == '__main__':
main()