Update processor.py
Browse files- processor.py +3 -0
processor.py
CHANGED
|
@@ -334,8 +334,10 @@ The following operations were applied to the source data:
|
|
| 334 |
ds_stream = load_dataset(source_id, name=conf, split=split, streaming=True, token=self.token)
|
| 335 |
count = 0
|
| 336 |
for i, row in enumerate(ds_stream):
|
|
|
|
| 337 |
if max_rows and count >= int(max_rows):
|
| 338 |
break
|
|
|
|
| 339 |
|
| 340 |
# 1. Filter
|
| 341 |
if recipe.get('filter_rule'):
|
|
@@ -389,6 +391,7 @@ The following operations were applied to the source data:
|
|
| 389 |
|
| 390 |
for i, row in enumerate(ds_stream):
|
| 391 |
if len(processed) >= 5: break
|
|
|
|
| 392 |
|
| 393 |
# Check Filter
|
| 394 |
passed = True
|
|
|
|
| 334 |
ds_stream = load_dataset(source_id, name=conf, split=split, streaming=True, token=self.token)
|
| 335 |
count = 0
|
| 336 |
for i, row in enumerate(ds_stream):
|
| 337 |
+
|
| 338 |
if max_rows and count >= int(max_rows):
|
| 339 |
break
|
| 340 |
+
row = dict(row)
|
| 341 |
|
| 342 |
# 1. Filter
|
| 343 |
if recipe.get('filter_rule'):
|
|
|
|
| 391 |
|
| 392 |
for i, row in enumerate(ds_stream):
|
| 393 |
if len(processed) >= 5: break
|
| 394 |
+
row = dict(row)
|
| 395 |
|
| 396 |
# Check Filter
|
| 397 |
passed = True
|