import polars as pl concentrations = pl.read_csv("concentrations.csv") import scanpy as sc from joblib import Parallel, delayed import gc drug_to_concentration = { row[0]: row[1] for row in concentrations.iter_rows() } def reduce(plate: int): try: print(f"Plate: {plate}") X = sc.read_h5ad(f"../Data/h5ad/h5ad/plate{plate}_filt_Vevo_Tahoe100M_WServicesFrom_ParseGigalab.h5ad", backed="r") print(f"Loaded: {plate}") X = X[(X.obs["pass_filter"] == "full") & (X.obs["drugname_drugconc"].astype(str) == X.obs["drug"].map(lambda x: drug_to_concentration[x]).astype(str))] print(f"Filtered: {plate}") X.write_h5ad(f"../Data/h5ad/reduced/plate{plate}.h5ad") print(f"Wrote: {plate}") cells = X.n_vars del X gc.collect() return cells except Exception as e: print(f"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") print(f"ERROR loading {plate}") print(f"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") results = Parallel(n_jobs=4)(delayed(reduce)(i) for i in range(1, 14 + 1)) print(results)