Spaces:

lynx-analytics
/

lynxkite

Runtime error

App Files Files Community

darabos commited on Jan 10

Commit

c53e4f2

1 Parent(s): c9d7995

Restore Cheminformatics example workspace.

Browse files

Files changed (7) hide show

.pre-commit-config.yaml +3 -2
examples/Cheminformatics/Example workspace.lynxkite.json +0 -0
examples/Cheminformatics/cheminfo_tools.py +18 -15
examples/Cheminformatics/draw_molecules.py +39 -0
examples/Cheminformatics/rcsb_api.py +0 -56
examples/Cheminformatics/requirements.txt +1 -2
uv.lock +1 -1

.pre-commit-config.yaml CHANGED Viewed

@@ -5,6 +5,7 @@ repos:
     - id: trailing-whitespace
     - id: end-of-file-fixer
     - id: check-yaml
 - repo: https://github.com/astral-sh/ruff-pre-commit
   rev: v0.9.10
   hooks:
@@ -26,7 +27,7 @@ repos:
   hooks:
     - id: deptry
       name: deptry for lynxkite-bio
-      entry: bash -c 'cd lynxkite-bio && deptry .'
     - id: deptry
       name: deptry for lynxkite-lynxscribe
-      entry: bash -c 'cd lynxkite-lynxscribe && deptry .'

     - id: trailing-whitespace
     - id: end-of-file-fixer
     - id: check-yaml
+    - id: check-added-large-files
 - repo: https://github.com/astral-sh/ruff-pre-commit
   rev: v0.9.10
   hooks:
   hooks:
     - id: deptry
       name: deptry for lynxkite-bio
+      entry: bash -c 'cd lynxkite-bio && uv run deptry .'
     - id: deptry
       name: deptry for lynxkite-lynxscribe
+      entry: bash -c 'cd lynxkite-lynxscribe && uv run deptry .'

examples/Cheminformatics/Example workspace.lynxkite.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

examples/Cheminformatics/cheminfo_tools.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 import pickle
-from lynxkite_core.ops import op
 from matplotlib import pyplot as plt
 import pandas as pd
 from rdkit.Chem.Draw import rdMolDraw2D
@@ -20,8 +20,11 @@ from sklearn.model_selection import train_test_split
 import numpy as np
 from rdkit.Chem import MACCSkeys
-@op("LynxKite Graph Analytics", "View mol filter", view="matplotlib", slow=True)
 def mol_filter(
     bundle,
     *,
@@ -92,7 +95,7 @@ def mol_filter(
     # draw each filtered molecule
     images = []
-    for mol, legend, atom_ids, bond_ids in legends:
         drawer = rdMolDraw2D.MolDraw2DCairo(400, 350)
         opts = drawer.drawOptions()
         opts.legendFontSize = 200
@@ -106,7 +109,7 @@ def mol_filter(
     plot_gallery(images, num_cols=mols_per_row)
-@op("LynxKite Graph Analytics", "Lipinski filter", outputs=["pass", "fail"])
 def lipinski_filter(bundle, *, table_name: str, column_name: str, strict_lipinski: bool = True):
     """
     Apply the lipinski rule (MW <= 500,HBA <= 10 , HBD <=5,logP <= 5 ).
@@ -142,7 +145,7 @@ def lipinski_filter(bundle, *, table_name: str, column_name: str, strict_lipinsk
     }
-@op("LynxKite Graph Analytics", "Veber filter", outputs=["pass", "fail"])
 def veber_filter(bundle, *, table_name: str, column_name: str):
     """
     Apply the Veber rule (TPSA <= 140, rotatable bonds <= 10).
@@ -165,7 +168,7 @@ def veber_filter(bundle, *, table_name: str, column_name: str):
     }
-@op("LynxKite Graph Analytics", "Ghose filter", outputs=["pass", "fail"])
 def ghose_filter(bundle, *, table_name: str, column_name: str, strict_ghose: bool = True):
     """
     Apply the Ghose filter:
@@ -202,7 +205,7 @@ def ghose_filter(bundle, *, table_name: str, column_name: str, strict_ghose: boo
     }
-@op("LynxKite Graph Analytics", "Egan filter", outputs=["pass", "fail"])
 def egan_filter(bundle, *, table_name: str, column_name: str, strict_egan: bool = True):
     """
     Apply the Egan filter (logP <= 5.88, TPSA <= 131.6).
@@ -224,7 +227,7 @@ def egan_filter(bundle, *, table_name: str, column_name: str, strict_egan: bool
     }
-@op("LynxKite Graph Analytics", "Muegge filter", outputs=["pass", "fail"])
 def muegge_filter(bundle, *, table_name: str, column_name: str, strict_muegge: bool = True):
     """
     Apply the Muegge filter:
@@ -266,7 +269,7 @@ def muegge_filter(bundle, *, table_name: str, column_name: str, strict_muegge: b
     }
-@op("LynxKite Graph Analytics", "Brenk Aggregator filter", outputs=["pass", "fail"])
 def brenk_aggregator_filter(
     bundle, *, table_name: str, column_name: str, strict_brenk: bool = True
 ):
@@ -297,7 +300,7 @@ def brenk_aggregator_filter(
     }
-@op("LynxKite Graph Analytics", "View mol image", view="matplotlib", slow=True)
 def mol_image(bundle, *, table_name: str, smiles_column: str, mols_per_row: int):
     df = bundle.dfs[table_name].copy()
     df["mol"] = df[smiles_column].apply(Chem.MolFromSmiles)
@@ -321,7 +324,7 @@ def mol_image(bundle, *, table_name: str, smiles_column: str, mols_per_row: int)
     # --- draw each molecule into its own sub‐image and paste ---
     images = []
-    for mol, legend in zip(mols, legends):
         # draw one molecule
         drawer = rdMolDraw2D.MolDraw2DCairo(400, 350)
         opts = drawer.drawOptions()
@@ -347,7 +350,7 @@ def plot_gallery(images, num_cols):
     plt.tight_layout()
-@op("LynxKite Graph Analytics", "Train QSAR model")
 def build_qsar_model(
     bundle,
     *,
@@ -490,7 +493,7 @@ def predict_with_ci(model, X, confidence=0.95):
 # --- End of predict_with_ci definition ---
-@op("LynxKite Graph Analytics", "Train QSAR2")
 def build_qsar_model2(
     df: pd.DataFrame,
     *,
@@ -672,7 +675,7 @@ def build_qsar_model2(
     return results_df
-@op("LynxKite Graph Analytics", "Plot QSAR", view="matplotlib")
 def plot_qsar(results_df: pd.DataFrame):
     """
     Plots actual vs. predicted values from a QSAR results DataFrame.
@@ -862,7 +865,7 @@ def plot_qsar(results_df: pd.DataFrame):
             )
-@op("LynxKite Graph Analytics", "Plot QSAR2", view="matplotlib")
 def plot_qsar2(results_df: pd.DataFrame):
     """
     Plots actual vs. predicted values resembling the example image.

 import os
 import pickle
+from lynxkite_core.ops import op_registration
 from matplotlib import pyplot as plt
 import pandas as pd
 from rdkit.Chem.Draw import rdMolDraw2D
 import numpy as np
 from rdkit.Chem import MACCSkeys
+op = op_registration("LynxKite Graph Analytics", icon="microscope-filled")
+MAX_IMAGE_COUNT = 20
+@op("View mol filter", view="matplotlib", slow=True)
 def mol_filter(
     bundle,
     *,
     # draw each filtered molecule
     images = []
+    for mol, legend, atom_ids, bond_ids in legends[:MAX_IMAGE_COUNT]:
         drawer = rdMolDraw2D.MolDraw2DCairo(400, 350)
         opts = drawer.drawOptions()
         opts.legendFontSize = 200
     plot_gallery(images, num_cols=mols_per_row)
+@op("Lipinski filter", outputs=["pass", "fail"])
 def lipinski_filter(bundle, *, table_name: str, column_name: str, strict_lipinski: bool = True):
     """
     Apply the lipinski rule (MW <= 500,HBA <= 10 , HBD <=5,logP <= 5 ).
     }
+@op("Veber filter", outputs=["pass", "fail"])
 def veber_filter(bundle, *, table_name: str, column_name: str):
     """
     Apply the Veber rule (TPSA <= 140, rotatable bonds <= 10).
     }
+@op("Ghose filter", outputs=["pass", "fail"])
 def ghose_filter(bundle, *, table_name: str, column_name: str, strict_ghose: bool = True):
     """
     Apply the Ghose filter:
     }
+@op("Egan filter", outputs=["pass", "fail"])
 def egan_filter(bundle, *, table_name: str, column_name: str, strict_egan: bool = True):
     """
     Apply the Egan filter (logP <= 5.88, TPSA <= 131.6).
     }
+@op("Muegge filter", outputs=["pass", "fail"])
 def muegge_filter(bundle, *, table_name: str, column_name: str, strict_muegge: bool = True):
     """
     Apply the Muegge filter:
     }
+@op("Brenk Aggregator filter", outputs=["pass", "fail"])
 def brenk_aggregator_filter(
     bundle, *, table_name: str, column_name: str, strict_brenk: bool = True
 ):
     }
+@op("View mol image", view="matplotlib", slow=True)
 def mol_image(bundle, *, table_name: str, smiles_column: str, mols_per_row: int):
     df = bundle.dfs[table_name].copy()
     df["mol"] = df[smiles_column].apply(Chem.MolFromSmiles)
     # --- draw each molecule into its own sub‐image and paste ---
     images = []
+    for mol, legend in zip(mols, legends[:MAX_IMAGE_COUNT]):
         # draw one molecule
         drawer = rdMolDraw2D.MolDraw2DCairo(400, 350)
         opts = drawer.drawOptions()
     plt.tight_layout()
+@op("Train QSAR model")
 def build_qsar_model(
     bundle,
     *,
 # --- End of predict_with_ci definition ---
+@op("Train QSAR2")
 def build_qsar_model2(
     df: pd.DataFrame,
     *,
     return results_df
+@op("Plot QSAR", view="matplotlib")
 def plot_qsar(results_df: pd.DataFrame):
     """
     Plots actual vs. predicted values from a QSAR results DataFrame.
             )
+@op("Plot QSAR2", view="matplotlib")
 def plot_qsar2(results_df: pd.DataFrame):
     """
     Plots actual vs. predicted values resembling the example image.

examples/Cheminformatics/draw_molecules.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from lynxkite_core.ops import op
+from lynxkite_graph_analytics.core import Bundle, TableName, ColumnNameByTableName
+import base64
+import io
+def pil_to_data(image):
+    buffer = io.BytesIO()
+    image.save(buffer, format="webp")
+    b64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
+    return "data:image/webp;base64," + b64
+def smiles_to_data(smiles):
+    import rdkit
+    m = rdkit.Chem.MolFromSmiles(smiles)
+    if m is None:
+        return None
+    img = rdkit.Chem.Draw.MolToImage(m)
+    data = pil_to_data(img)
+    return data
+@op("LynxKite Graph Analytics", "Draw molecules", icon="microscope-filled")
+def draw_molecules(
+    b: Bundle,
+    *,
+    table_name: TableName,
+    smiles_column: ColumnNameByTableName,
+    image_column: str = "image",
+):
+    """Adds molecule images in a table."""
+    b = b.copy()
+    df = b.dfs[table_name]
+    df = df.copy()
+    df[image_column] = df[smiles_column].apply(smiles_to_data)
+    b.dfs[table_name] = df
+    return b

examples/Cheminformatics/rcsb_api.py CHANGED Viewed

@@ -2,68 +2,12 @@ import pandas as pd
 import matplotlib.pyplot as plt
 import pypdb
 import biotite.database.rcsb as rcsb
-from MDAnalysis.analysis import rms
-from opencadd.structure.superposition.engines.mda import MDAnalysisAligner
 from lynxkite_core.ops import op
 import os
 import numpy as np
 from Bio.PDB import PDBList, PDBParser, Superimposer
-def calc_rmsd(A, B):
-    """
-    Calculate RMSD between two structures.
-    Parameters
-    ----------
-    A : opencadd.structure.core.Structure
-        Structure A.
-    B : opencadd.structure.core.Structure
-        Structure B.
-    Returns
-    -------
-    float
-        RMSD value.
-    """
-    aligner = MDAnalysisAligner()
-    selection, _ = aligner.matching_selection(A, B)
-    A = A.select_atoms(selection["reference"])
-    B = B.select_atoms(selection["mobile"])
-    return rms.rmsd(A.positions, B.positions, superposition=False)
-def calc_rmsd_matrix(structures, names):
-    """
-    Calculate RMSD matrix between a list of structures.
-    Parameters
-    ----------
-    structures : list of opencadd.structure.core.Structure
-        List of structures.
-    names : list of str
-        List of structure names.
-    Returns
-    -------
-    pandas.DataFrame
-        RMSD matrix.
-    """
-    values = {name: {} for name in names}
-    for i, (A, name_i) in enumerate(zip(structures, names)):
-        for j, (B, name_j) in enumerate(zip(structures, names)):
-            if i == j:
-                values[name_i][name_j] = 0.0
-                continue
-            if i < j:
-                rmsd = calc_rmsd(A, B)
-                values[name_i][name_j] = rmsd
-                values[name_j][name_i] = rmsd
-                continue
-    df = pd.DataFrame.from_dict(values)
-    return df
 @op("LynxKite Graph Analytics", "PDB composite search")
 def get_pdb_count(
     *, ligand_id: str, experimental_method: str, max_resolution: float, polymer_count: int

 import matplotlib.pyplot as plt
 import pypdb
 import biotite.database.rcsb as rcsb
 from lynxkite_core.ops import op
 import os
 import numpy as np
 from Bio.PDB import PDBList, PDBParser, Superimposer
 @op("LynxKite Graph Analytics", "PDB composite search")
 def get_pdb_count(
     *, ligand_id: str, experimental_method: str, max_resolution: float, polymer_count: int

examples/Cheminformatics/requirements.txt CHANGED Viewed

@@ -1,10 +1,9 @@
 biotite
 pypdb
 rdkit
 MDAnalysis
 redo
-https://github.com/volkamerlab/opencadd/archive/master.tar.gz
 seaborn
-=======
 chembl_webresource_client
 rcsb-api

+biopython
 biotite
 pypdb
 rdkit
 MDAnalysis
 redo
 seaborn
 chembl_webresource_client
 rcsb-api

uv.lock CHANGED Viewed

@@ -2551,7 +2551,7 @@ requires-dist = [
     { name = "scanpy", specifier = ">=1.11.3" },
     { name = "scikit-learn", specifier = ">=1.7.1" },
     { name = "scipy", specifier = ">=1.15.2" },
-    { name = "torch", specifier = ">=2.7.0" },
 ]
 [[package]]

     { name = "scanpy", specifier = ">=1.11.3" },
     { name = "scikit-learn", specifier = ">=1.7.1" },
     { name = "scipy", specifier = ">=1.15.2" },
+    { name = "torch", specifier = ">=2.7.0", index = "https://pypi.org/simple" },
 ]
 [[package]]