Spaces:
Running
Running
| import logging | |
| import os | |
| import pathlib | |
| import tempfile | |
| from typing import List, Optional | |
| import gradio as gr | |
| import pandas as pd | |
| from rdkit import Chem | |
| from tqdm import tqdm | |
| from configuration import GENE_EXPRESSION_METADATA | |
| from submission import submission | |
| logger = logging.getLogger(__name__) | |
| logger.addHandler(logging.NullHandler()) | |
| site_mapper = { | |
| "central_nervous_system": "CNS", | |
| "haematopoietic_and_lymphoid_tissue": "Haema_lymph", | |
| "upper_aerodigestive_tract": "digestive", | |
| "autonomic_ganglia": "ganglia", | |
| } | |
| def run_inference( | |
| smiles: Optional[str], | |
| smiles_path: Optional[str], | |
| omic: Optional[str], | |
| confidence: bool, | |
| ): | |
| # Read SMILES | |
| if smiles == "" and smiles_path is None: | |
| raise TypeError("Pass either single SMILES or a file") | |
| elif smiles != "" and smiles_path is not None: | |
| raise TypeError("Pass either single SMILES or a file, not both") | |
| elif smiles != "": | |
| smiles = [smiles] | |
| elif smiles_path is not None: | |
| smiles_data = pd.read_csv(smiles_path.name, sep="\t", header=None) | |
| smiles = smiles_data[0] | |
| for smi in smiles: | |
| if Chem.MolFromSmiles(smi) is None: | |
| raise ValueError(f"Found invalid SMILES {smi}") | |
| # Read omics and otherwise load baseline | |
| if omic is not None: | |
| omic_path = omic.name | |
| else: | |
| omic_path = None | |
| result = pd.DataFrame({}) | |
| for smi in tqdm(smiles, total=len(smiles)): | |
| output = submission( | |
| drug={"smiles": smi}, | |
| workspace_id="emulated_workspace_id", | |
| task_id="emulated_task_id", | |
| estimate_confidence=confidence, | |
| omics_file=omic_path, | |
| ) | |
| # For the moment no attention analysis | |
| output.pop("gene_attention") | |
| output.pop("smiles_attention", None) | |
| output.pop("IC50") | |
| result[f"IC50_{smi}"] = output["log_micromolar_IC50"].squeeze().round(3) | |
| if confidence: | |
| result[f"aleatoric_confidence_{smi}"] = ( | |
| output["aleatoric_confidence"].squeeze().round(3) | |
| ) | |
| result[f"epistemic_confidence_{smi}"] = ( | |
| output["aleatoric_confidence"].squeeze().round(3) | |
| ) | |
| predicted_df = result | |
| # Prepare DF to visualize | |
| if omic_path is None: | |
| df = GENE_EXPRESSION_METADATA.copy() | |
| df.drop( | |
| [ | |
| "histology", | |
| "cell_line_name", | |
| "IC50 (min/max scaled)", | |
| "IC50 (log(μmol))", | |
| ], | |
| axis=1, | |
| inplace=True, | |
| ) | |
| df["site"] = df["site"].apply(lambda x: site_mapper.get(x, x)) | |
| df["cell_line"] = df["cell_line"].apply(lambda x: x.split("_")[0]) | |
| if (not confidence) and "aleatoric_confidence" in df.columns: | |
| df.drop( | |
| ["aleatoric_confidence", "epistemic_confidence"], axis=1, inplace=True | |
| ) | |
| if (not confidence) and "aleatoric_confidence" in predicted_df.columns: | |
| predicted_df.drop( | |
| ["aleatoric_confidence", "epistemic_confidence"], axis=1, inplace=True | |
| ) | |
| # else: | |
| # df = pd.read_csv(omic_path, low_memory=False) | |
| result_df = pd.concat( | |
| [df["cell_line"], predicted_df, df.drop(["cell_line"], axis=1)], axis=1 | |
| ) | |
| else: | |
| result_df = predicted_df | |
| # Save to temporary dir | |
| temp_path = os.path.join(tempfile.gettempdir(), "paccmann_result.csv") | |
| result_df.to_csv(temp_path) | |
| return temp_path, result_df.head(25) | |
| if __name__ == "__main__": | |
| # Load metadata | |
| metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards") | |
| examples = [ | |
| ["COc1cc(O)c2c(c1)C=CCC(O)C(O)C(=O)C=CCC(C)OC2=O", None, None, False], | |
| ["COC1=C(C=C2C(=C1)N=CN=C2NC3=CC(=C(C=C3)F)Cl)OCCCN4CCOCC4", None, None, True], | |
| # [None, metadata_root.joinpath("molecules.smi"), None, False], | |
| ] | |
| with open(metadata_root.joinpath("article.md"), "r") as f: | |
| article = f.read() | |
| with open(metadata_root.joinpath("description.md"), "r") as f: | |
| description = f.read() | |
| demo = gr.Interface( | |
| fn=run_inference, | |
| title="PaccMann", | |
| inputs=[ | |
| gr.Textbox( | |
| label="SMILES", | |
| placeholder="COc1cc(O)c2c(c1)C=CCC(O)C(O)C(=O)C=CCC(C)OC2=O", | |
| lines=1, | |
| ), | |
| gr.File( | |
| file_types=[".smi", ".tsv"], | |
| label="Multiple SMILES", | |
| ), | |
| gr.File( | |
| file_types=[".csv"], | |
| label="Transcriptomics data file", | |
| ), | |
| gr.Radio(choices=[True, False], label="Estimate confidence", value=False), | |
| ], | |
| outputs=[ | |
| gr.File(label="Download full results"), | |
| gr.DataFrame(label="Preview of results for 25 cell lines"), | |
| ], | |
| article=article, | |
| description=description, | |
| examples=examples, | |
| ) | |
| demo.launch(debug=True, show_error=True) | |