| { | |
| "description": "DEBUG CONFIG - 500 samples total pour itération rapide", | |
| "sources": [ | |
| { | |
| "source_type": "huggingface", | |
| "path": "wikitext", | |
| "config_name": "wikitext-103-v1", | |
| "text_column": "text", | |
| "weight": 1.0, | |
| "streaming": true, | |
| "split": "train", | |
| "max_samples": 300, | |
| "filters": {} | |
| } | |
| ], | |
| "preprocess_config": { | |
| "min_length": 50, | |
| "max_length": 512, | |
| "seq_length": 64, | |
| "remove_duplicates": true, | |
| "lowercase": false, | |
| "remove_urls": true, | |
| "remove_special_chars": false, | |
| "custom_filters": [] | |
| } | |
| } | |