| { | |
| "builder_name": "parquet", | |
| "citation": "", | |
| "config_name": "default", | |
| "dataset_name": "astro_paper_corpus", | |
| "dataset_size": 4128813829, | |
| "description": "", | |
| "download_checksums": { | |
| "hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00000-of-00009.parquet": { | |
| "num_bytes": 240072323, | |
| "checksum": null | |
| }, | |
| "hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00001-of-00009.parquet": { | |
| "num_bytes": 235851056, | |
| "checksum": null | |
| }, | |
| "hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00002-of-00009.parquet": { | |
| "num_bytes": 236413937, | |
| "checksum": null | |
| }, | |
| "hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00003-of-00009.parquet": { | |
| "num_bytes": 237728419, | |
| "checksum": null | |
| }, | |
| "hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00004-of-00009.parquet": { | |
| "num_bytes": 236710419, | |
| "checksum": null | |
| }, | |
| "hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00005-of-00009.parquet": { | |
| "num_bytes": 239567004, | |
| "checksum": null | |
| }, | |
| "hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00006-of-00009.parquet": { | |
| "num_bytes": 234863979, | |
| "checksum": null | |
| }, | |
| "hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00007-of-00009.parquet": { | |
| "num_bytes": 232662046, | |
| "checksum": null | |
| }, | |
| "hf://datasets/JSALT2024-Astro-LLMs/astro_paper_corpus@b957a28700badb3b5f5c7af06ea77a2560ab6e46/data/train-00008-of-00009.parquet": { | |
| "num_bytes": 237444927, | |
| "checksum": null | |
| } | |
| }, | |
| "download_size": 2131314110, | |
| "features": { | |
| "id": { | |
| "dtype": "string", | |
| "_type": "Value" | |
| }, | |
| "author": { | |
| "feature": { | |
| "dtype": "string", | |
| "_type": "Value" | |
| }, | |
| "_type": "Sequence" | |
| }, | |
| "bibcode": { | |
| "dtype": "string", | |
| "_type": "Value" | |
| }, | |
| "title": { | |
| "feature": { | |
| "dtype": "string", | |
| "_type": "Value" | |
| }, | |
| "_type": "Sequence" | |
| }, | |
| "citation_count": { | |
| "dtype": "int64", | |
| "_type": "Value" | |
| }, | |
| "aff": { | |
| "feature": { | |
| "dtype": "string", | |
| "_type": "Value" | |
| }, | |
| "_type": "Sequence" | |
| }, | |
| "citation": { | |
| "feature": { | |
| "dtype": "string", | |
| "_type": "Value" | |
| }, | |
| "_type": "Sequence" | |
| }, | |
| "database": { | |
| "feature": { | |
| "dtype": "string", | |
| "_type": "Value" | |
| }, | |
| "_type": "Sequence" | |
| }, | |
| "read_count": { | |
| "dtype": "int64", | |
| "_type": "Value" | |
| }, | |
| "keyword": { | |
| "feature": { | |
| "dtype": "string", | |
| "_type": "Value" | |
| }, | |
| "_type": "Sequence" | |
| }, | |
| "reference": { | |
| "feature": { | |
| "dtype": "string", | |
| "_type": "Value" | |
| }, | |
| "_type": "Sequence" | |
| }, | |
| "doi": { | |
| "feature": { | |
| "dtype": "string", | |
| "_type": "Value" | |
| }, | |
| "_type": "Sequence" | |
| }, | |
| "subfolder": { | |
| "dtype": "string", | |
| "_type": "Value" | |
| }, | |
| "filename": { | |
| "dtype": "string", | |
| "_type": "Value" | |
| }, | |
| "introduction": { | |
| "dtype": "string", | |
| "_type": "Value" | |
| }, | |
| "conclusions": { | |
| "dtype": "string", | |
| "_type": "Value" | |
| }, | |
| "year": { | |
| "dtype": "int64", | |
| "_type": "Value" | |
| }, | |
| "month": { | |
| "dtype": "int64", | |
| "_type": "Value" | |
| }, | |
| "arxiv_id": { | |
| "dtype": "string", | |
| "_type": "Value" | |
| }, | |
| "abstract": { | |
| "dtype": "string", | |
| "_type": "Value" | |
| }, | |
| "failed_ids": { | |
| "dtype": "bool", | |
| "_type": "Value" | |
| }, | |
| "keyword_search": { | |
| "feature": { | |
| "dtype": "string", | |
| "_type": "Value" | |
| }, | |
| "_type": "Sequence" | |
| }, | |
| "umap_x": { | |
| "dtype": "float32", | |
| "_type": "Value" | |
| }, | |
| "umap_y": { | |
| "dtype": "float32", | |
| "_type": "Value" | |
| }, | |
| "clust_id": { | |
| "dtype": "int64", | |
| "_type": "Value" | |
| } | |
| }, | |
| "homepage": "", | |
| "license": "", | |
| "size_in_bytes": 6260127939, | |
| "splits": { | |
| "train": { | |
| "name": "train", | |
| "num_bytes": 4128813829, | |
| "num_examples": 271544, | |
| "shard_lengths": [ | |
| 33172, | |
| 33172, | |
| 33172, | |
| 33172, | |
| 33172, | |
| 33171, | |
| 34171, | |
| 34171, | |
| 4171 | |
| ], | |
| "dataset_name": "astro_paper_corpus" | |
| } | |
| }, | |
| "version": { | |
| "version_str": "0.0.0", | |
| "major": 0, | |
| "minor": 0, | |
| "patch": 0 | |
| } | |
| } |