taesiri commited on
Commit
0ebeb4a
·
1 Parent(s): 4d70418
Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -32,8 +32,12 @@ dataset_post_ids = list(
32
  photoexp = pd.read_csv("./photoexp_filtered.csv")
33
  valid_post_ids = set(photoexp.post_id.tolist())
34
 
35
- # filter RESULTS_BACKUP_REPO to include only valid_post_ids
36
- dataset = dataset.filter(lambda x: x["post_id"] in valid_post_ids)
 
 
 
 
37
 
38
 
39
  # Download existing data from hub
 
32
  photoexp = pd.read_csv("./photoexp_filtered.csv")
33
  valid_post_ids = set(photoexp.post_id.tolist())
34
 
35
+ # filter RESULTS_BACKUP_REPO to include only valid_post_ids using batched processing
36
+ dataset = dataset.filter(
37
+ lambda xs: [x in valid_post_ids for x in xs["post_id"]],
38
+ batched=True,
39
+ batch_size=256,
40
+ )
41
 
42
 
43
  # Download existing data from hub