Fix dead split parameter in PackedStreamingDataset._load_dataset
Browse filesUse self.split instead of self.config.dataset_split so the split
argument passed to the constructor is actually forwarded to load_dataset.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- llm_lab/data/dataset.py +1 -1
llm_lab/data/dataset.py
CHANGED
|
@@ -61,7 +61,7 @@ class PackedStreamingDataset(IterableDataset):
|
|
| 61 |
ds = load_dataset(
|
| 62 |
self.config.dataset_name,
|
| 63 |
name=self.config.dataset_subset,
|
| 64 |
-
split=self.
|
| 65 |
streaming=True, # Key: streaming mode
|
| 66 |
trust_remote_code=True,
|
| 67 |
)
|
|
|
|
| 61 |
ds = load_dataset(
|
| 62 |
self.config.dataset_name,
|
| 63 |
name=self.config.dataset_subset,
|
| 64 |
+
split=self.split,
|
| 65 |
streaming=True, # Key: streaming mode
|
| 66 |
trust_remote_code=True,
|
| 67 |
)
|