{ "name": "text8", "description": "Text8 dataset - a large text corpus for training word embeddings", "source": "http://mattmahoney.net/dc/text8.zip", "license": "Public domain", "format": "text", "usage": "Use for training word embeddings and language models" }