{ "description": "DEBUG CONFIG - 500 samples total pour itération rapide", "sources": [ { "source_type": "huggingface", "path": "wikitext", "config_name": "wikitext-103-v1", "text_column": "text", "weight": 1.0, "streaming": true, "split": "train", "max_samples": 300, "filters": {} } ], "preprocess_config": { "min_length": 50, "max_length": 512, "seq_length": 64, "remove_duplicates": true, "lowercase": false, "remove_urls": true, "remove_special_chars": false, "custom_filters": [] } }