processing / utils / __init__.py
LiamKhoaLe's picture
Upd 14/9
80cb919
raw
history blame
740 Bytes
"""
Utility package for the Medical Dataset Augmenter Space.
This package provides:
- drive_saver: Google Drive upload helper
- llm: API key rotation, paraphraser, translation/backtranslation
- datasets: Hugging Face dataset resolver & downloader
- processor: dataset-specific processing pipeline with augmentation
- schema: centralised SFT writer (JSONL + CSV)
- token: GCS project token refresher and authenticator
- augment: low-level augmentation utilities (text cleanup, deid, paraphrase hooks)
"""
from . import drive_saver
from . import llm
from . import datasets
from . import processor
from . import schema
from . import augment
from . import token
__all__ = ["drive_saver", "llm", "datasets", "processor", "schema", "augment"]