name: "Data_Wikipedia_En_1M" parquet_path: local: "./_LexaLCM_Pre0/Datasets/Wikipedia_En_1M" source_column: "text_sentences_sonar_emb" source_text_column: "text_sentences"