from data.registry import DatasetRegistry from data.loaders.language import LanguageTextLoader registry = DatasetRegistry() registry.register(LanguageTextLoader("data/raw/wikipedia")) texts = [s["text"] for s in registry.all_samples()]