"""Dataset implementations and loaders.""" # HuggingFace-based datasets are optional for JSONL-only deployments. try: from .hf_base import BaseHFDataset from .hf_pretrain import PretrainDataset from .hf_sft import SFTDataset from .hf_rl import RLDataset except ImportError: BaseHFDataset = None PretrainDataset = None SFTDataset = None RLDataset = None # JSONL-based datasets (async-only) from .jsonl_base import BaseJSONLDataset from .pretrain_jsonl import PretrainJSONLDataset from .sft_jsonl import SFTJSONLDataset from .rl_jsonl import RLJSONLDataset # Utilities from .tokenizer import SentencePieceTokenizerWrapper from .sft_utils import ( parse_sft_record, build_sft_sequence_tokens, apply_response_masking, build_response_only_next_token_labels, ) from .loaders import get_dataloader from .async_loader import AsyncBatchIterator from .tokenization_queue import TokenizationQueue from .factory import DatasetFactory __all__ = [ # HuggingFace datasets "BaseHFDataset", "PretrainDataset", "SFTDataset", "RLDataset", # JSONL datasets "BaseJSONLDataset", "PretrainJSONLDataset", "SFTJSONLDataset", "RLJSONLDataset", # Utilities "SentencePieceTokenizerWrapper", "parse_sft_record", "build_sft_sequence_tokens", "apply_response_masking", "build_response_only_next_token_labels", # Data loading "get_dataloader", "AsyncBatchIterator", "TokenizationQueue", "DatasetFactory", ]