pacific-prime / debug_config.json
Pacific-Prime's picture
Upload folder using huggingface_hub
6a7d89a verified
raw
history blame contribute delete
621 Bytes
{
"description": "DEBUG CONFIG - 500 samples total pour itération rapide",
"sources": [
{
"source_type": "huggingface",
"path": "wikitext",
"config_name": "wikitext-103-v1",
"text_column": "text",
"weight": 1.0,
"streaming": true,
"split": "train",
"max_samples": 300,
"filters": {}
}
],
"preprocess_config": {
"min_length": 50,
"max_length": 512,
"seq_length": 64,
"remove_duplicates": true,
"lowercase": false,
"remove_urls": true,
"remove_special_chars": false,
"custom_filters": []
}
}