Spaces:
Sleeping
Sleeping
backup
Browse files
app.py
CHANGED
|
@@ -32,8 +32,12 @@ dataset_post_ids = list(
|
|
| 32 |
photoexp = pd.read_csv("./photoexp_filtered.csv")
|
| 33 |
valid_post_ids = set(photoexp.post_id.tolist())
|
| 34 |
|
| 35 |
-
# filter RESULTS_BACKUP_REPO to include only valid_post_ids
|
| 36 |
-
dataset = dataset.filter(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
|
| 39 |
# Download existing data from hub
|
|
|
|
| 32 |
photoexp = pd.read_csv("./photoexp_filtered.csv")
|
| 33 |
valid_post_ids = set(photoexp.post_id.tolist())
|
| 34 |
|
| 35 |
+
# filter RESULTS_BACKUP_REPO to include only valid_post_ids using batched processing
|
| 36 |
+
dataset = dataset.filter(
|
| 37 |
+
lambda xs: [x in valid_post_ids for x in xs["post_id"]],
|
| 38 |
+
batched=True,
|
| 39 |
+
batch_size=256,
|
| 40 |
+
)
|
| 41 |
|
| 42 |
|
| 43 |
# Download existing data from hub
|