darabos commited on
Commit
c53e4f2
·
1 Parent(s): c9d7995

Restore Cheminformatics example workspace.

Browse files
.pre-commit-config.yaml CHANGED
@@ -5,6 +5,7 @@ repos:
5
  - id: trailing-whitespace
6
  - id: end-of-file-fixer
7
  - id: check-yaml
 
8
  - repo: https://github.com/astral-sh/ruff-pre-commit
9
  rev: v0.9.10
10
  hooks:
@@ -26,7 +27,7 @@ repos:
26
  hooks:
27
  - id: deptry
28
  name: deptry for lynxkite-bio
29
- entry: bash -c 'cd lynxkite-bio && deptry .'
30
  - id: deptry
31
  name: deptry for lynxkite-lynxscribe
32
- entry: bash -c 'cd lynxkite-lynxscribe && deptry .'
 
5
  - id: trailing-whitespace
6
  - id: end-of-file-fixer
7
  - id: check-yaml
8
+ - id: check-added-large-files
9
  - repo: https://github.com/astral-sh/ruff-pre-commit
10
  rev: v0.9.10
11
  hooks:
 
27
  hooks:
28
  - id: deptry
29
  name: deptry for lynxkite-bio
30
+ entry: bash -c 'cd lynxkite-bio && uv run deptry .'
31
  - id: deptry
32
  name: deptry for lynxkite-lynxscribe
33
+ entry: bash -c 'cd lynxkite-lynxscribe && uv run deptry .'
examples/Cheminformatics/Example workspace.lynxkite.json CHANGED
The diff for this file is too large to render. See raw diff
 
examples/Cheminformatics/cheminfo_tools.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
  import pickle
3
- from lynxkite_core.ops import op
4
  from matplotlib import pyplot as plt
5
  import pandas as pd
6
  from rdkit.Chem.Draw import rdMolDraw2D
@@ -20,8 +20,11 @@ from sklearn.model_selection import train_test_split
20
  import numpy as np
21
  from rdkit.Chem import MACCSkeys
22
 
 
 
23
 
24
- @op("LynxKite Graph Analytics", "View mol filter", view="matplotlib", slow=True)
 
25
  def mol_filter(
26
  bundle,
27
  *,
@@ -92,7 +95,7 @@ def mol_filter(
92
 
93
  # draw each filtered molecule
94
  images = []
95
- for mol, legend, atom_ids, bond_ids in legends:
96
  drawer = rdMolDraw2D.MolDraw2DCairo(400, 350)
97
  opts = drawer.drawOptions()
98
  opts.legendFontSize = 200
@@ -106,7 +109,7 @@ def mol_filter(
106
  plot_gallery(images, num_cols=mols_per_row)
107
 
108
 
109
- @op("LynxKite Graph Analytics", "Lipinski filter", outputs=["pass", "fail"])
110
  def lipinski_filter(bundle, *, table_name: str, column_name: str, strict_lipinski: bool = True):
111
  """
112
  Apply the lipinski rule (MW <= 500,HBA <= 10 , HBD <=5,logP <= 5 ).
@@ -142,7 +145,7 @@ def lipinski_filter(bundle, *, table_name: str, column_name: str, strict_lipinsk
142
  }
143
 
144
 
145
- @op("LynxKite Graph Analytics", "Veber filter", outputs=["pass", "fail"])
146
  def veber_filter(bundle, *, table_name: str, column_name: str):
147
  """
148
  Apply the Veber rule (TPSA <= 140, rotatable bonds <= 10).
@@ -165,7 +168,7 @@ def veber_filter(bundle, *, table_name: str, column_name: str):
165
  }
166
 
167
 
168
- @op("LynxKite Graph Analytics", "Ghose filter", outputs=["pass", "fail"])
169
  def ghose_filter(bundle, *, table_name: str, column_name: str, strict_ghose: bool = True):
170
  """
171
  Apply the Ghose filter:
@@ -202,7 +205,7 @@ def ghose_filter(bundle, *, table_name: str, column_name: str, strict_ghose: boo
202
  }
203
 
204
 
205
- @op("LynxKite Graph Analytics", "Egan filter", outputs=["pass", "fail"])
206
  def egan_filter(bundle, *, table_name: str, column_name: str, strict_egan: bool = True):
207
  """
208
  Apply the Egan filter (logP <= 5.88, TPSA <= 131.6).
@@ -224,7 +227,7 @@ def egan_filter(bundle, *, table_name: str, column_name: str, strict_egan: bool
224
  }
225
 
226
 
227
- @op("LynxKite Graph Analytics", "Muegge filter", outputs=["pass", "fail"])
228
  def muegge_filter(bundle, *, table_name: str, column_name: str, strict_muegge: bool = True):
229
  """
230
  Apply the Muegge filter:
@@ -266,7 +269,7 @@ def muegge_filter(bundle, *, table_name: str, column_name: str, strict_muegge: b
266
  }
267
 
268
 
269
- @op("LynxKite Graph Analytics", "Brenk Aggregator filter", outputs=["pass", "fail"])
270
  def brenk_aggregator_filter(
271
  bundle, *, table_name: str, column_name: str, strict_brenk: bool = True
272
  ):
@@ -297,7 +300,7 @@ def brenk_aggregator_filter(
297
  }
298
 
299
 
300
- @op("LynxKite Graph Analytics", "View mol image", view="matplotlib", slow=True)
301
  def mol_image(bundle, *, table_name: str, smiles_column: str, mols_per_row: int):
302
  df = bundle.dfs[table_name].copy()
303
  df["mol"] = df[smiles_column].apply(Chem.MolFromSmiles)
@@ -321,7 +324,7 @@ def mol_image(bundle, *, table_name: str, smiles_column: str, mols_per_row: int)
321
 
322
  # --- draw each molecule into its own sub‐image and paste ---
323
  images = []
324
- for mol, legend in zip(mols, legends):
325
  # draw one molecule
326
  drawer = rdMolDraw2D.MolDraw2DCairo(400, 350)
327
  opts = drawer.drawOptions()
@@ -347,7 +350,7 @@ def plot_gallery(images, num_cols):
347
  plt.tight_layout()
348
 
349
 
350
- @op("LynxKite Graph Analytics", "Train QSAR model")
351
  def build_qsar_model(
352
  bundle,
353
  *,
@@ -490,7 +493,7 @@ def predict_with_ci(model, X, confidence=0.95):
490
  # --- End of predict_with_ci definition ---
491
 
492
 
493
- @op("LynxKite Graph Analytics", "Train QSAR2")
494
  def build_qsar_model2(
495
  df: pd.DataFrame,
496
  *,
@@ -672,7 +675,7 @@ def build_qsar_model2(
672
  return results_df
673
 
674
 
675
- @op("LynxKite Graph Analytics", "Plot QSAR", view="matplotlib")
676
  def plot_qsar(results_df: pd.DataFrame):
677
  """
678
  Plots actual vs. predicted values from a QSAR results DataFrame.
@@ -862,7 +865,7 @@ def plot_qsar(results_df: pd.DataFrame):
862
  )
863
 
864
 
865
- @op("LynxKite Graph Analytics", "Plot QSAR2", view="matplotlib")
866
  def plot_qsar2(results_df: pd.DataFrame):
867
  """
868
  Plots actual vs. predicted values resembling the example image.
 
1
  import os
2
  import pickle
3
+ from lynxkite_core.ops import op_registration
4
  from matplotlib import pyplot as plt
5
  import pandas as pd
6
  from rdkit.Chem.Draw import rdMolDraw2D
 
20
  import numpy as np
21
  from rdkit.Chem import MACCSkeys
22
 
23
+ op = op_registration("LynxKite Graph Analytics", icon="microscope-filled")
24
+ MAX_IMAGE_COUNT = 20
25
 
26
+
27
+ @op("View mol filter", view="matplotlib", slow=True)
28
  def mol_filter(
29
  bundle,
30
  *,
 
95
 
96
  # draw each filtered molecule
97
  images = []
98
+ for mol, legend, atom_ids, bond_ids in legends[:MAX_IMAGE_COUNT]:
99
  drawer = rdMolDraw2D.MolDraw2DCairo(400, 350)
100
  opts = drawer.drawOptions()
101
  opts.legendFontSize = 200
 
109
  plot_gallery(images, num_cols=mols_per_row)
110
 
111
 
112
+ @op("Lipinski filter", outputs=["pass", "fail"])
113
  def lipinski_filter(bundle, *, table_name: str, column_name: str, strict_lipinski: bool = True):
114
  """
115
  Apply the lipinski rule (MW <= 500,HBA <= 10 , HBD <=5,logP <= 5 ).
 
145
  }
146
 
147
 
148
+ @op("Veber filter", outputs=["pass", "fail"])
149
  def veber_filter(bundle, *, table_name: str, column_name: str):
150
  """
151
  Apply the Veber rule (TPSA <= 140, rotatable bonds <= 10).
 
168
  }
169
 
170
 
171
+ @op("Ghose filter", outputs=["pass", "fail"])
172
  def ghose_filter(bundle, *, table_name: str, column_name: str, strict_ghose: bool = True):
173
  """
174
  Apply the Ghose filter:
 
205
  }
206
 
207
 
208
+ @op("Egan filter", outputs=["pass", "fail"])
209
  def egan_filter(bundle, *, table_name: str, column_name: str, strict_egan: bool = True):
210
  """
211
  Apply the Egan filter (logP <= 5.88, TPSA <= 131.6).
 
227
  }
228
 
229
 
230
+ @op("Muegge filter", outputs=["pass", "fail"])
231
  def muegge_filter(bundle, *, table_name: str, column_name: str, strict_muegge: bool = True):
232
  """
233
  Apply the Muegge filter:
 
269
  }
270
 
271
 
272
+ @op("Brenk Aggregator filter", outputs=["pass", "fail"])
273
  def brenk_aggregator_filter(
274
  bundle, *, table_name: str, column_name: str, strict_brenk: bool = True
275
  ):
 
300
  }
301
 
302
 
303
+ @op("View mol image", view="matplotlib", slow=True)
304
  def mol_image(bundle, *, table_name: str, smiles_column: str, mols_per_row: int):
305
  df = bundle.dfs[table_name].copy()
306
  df["mol"] = df[smiles_column].apply(Chem.MolFromSmiles)
 
324
 
325
  # --- draw each molecule into its own sub‐image and paste ---
326
  images = []
327
+ for mol, legend in zip(mols, legends[:MAX_IMAGE_COUNT]):
328
  # draw one molecule
329
  drawer = rdMolDraw2D.MolDraw2DCairo(400, 350)
330
  opts = drawer.drawOptions()
 
350
  plt.tight_layout()
351
 
352
 
353
+ @op("Train QSAR model")
354
  def build_qsar_model(
355
  bundle,
356
  *,
 
493
  # --- End of predict_with_ci definition ---
494
 
495
 
496
+ @op("Train QSAR2")
497
  def build_qsar_model2(
498
  df: pd.DataFrame,
499
  *,
 
675
  return results_df
676
 
677
 
678
+ @op("Plot QSAR", view="matplotlib")
679
  def plot_qsar(results_df: pd.DataFrame):
680
  """
681
  Plots actual vs. predicted values from a QSAR results DataFrame.
 
865
  )
866
 
867
 
868
+ @op("Plot QSAR2", view="matplotlib")
869
  def plot_qsar2(results_df: pd.DataFrame):
870
  """
871
  Plots actual vs. predicted values resembling the example image.
examples/Cheminformatics/draw_molecules.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from lynxkite_core.ops import op
2
+ from lynxkite_graph_analytics.core import Bundle, TableName, ColumnNameByTableName
3
+ import base64
4
+ import io
5
+
6
+
7
+ def pil_to_data(image):
8
+ buffer = io.BytesIO()
9
+ image.save(buffer, format="webp")
10
+ b64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
11
+ return "data:image/webp;base64," + b64
12
+
13
+
14
+ def smiles_to_data(smiles):
15
+ import rdkit
16
+
17
+ m = rdkit.Chem.MolFromSmiles(smiles)
18
+ if m is None:
19
+ return None
20
+ img = rdkit.Chem.Draw.MolToImage(m)
21
+ data = pil_to_data(img)
22
+ return data
23
+
24
+
25
+ @op("LynxKite Graph Analytics", "Draw molecules", icon="microscope-filled")
26
+ def draw_molecules(
27
+ b: Bundle,
28
+ *,
29
+ table_name: TableName,
30
+ smiles_column: ColumnNameByTableName,
31
+ image_column: str = "image",
32
+ ):
33
+ """Adds molecule images in a table."""
34
+ b = b.copy()
35
+ df = b.dfs[table_name]
36
+ df = df.copy()
37
+ df[image_column] = df[smiles_column].apply(smiles_to_data)
38
+ b.dfs[table_name] = df
39
+ return b
examples/Cheminformatics/rcsb_api.py CHANGED
@@ -2,68 +2,12 @@ import pandas as pd
2
  import matplotlib.pyplot as plt
3
  import pypdb
4
  import biotite.database.rcsb as rcsb
5
- from MDAnalysis.analysis import rms
6
- from opencadd.structure.superposition.engines.mda import MDAnalysisAligner
7
  from lynxkite_core.ops import op
8
  import os
9
  import numpy as np
10
  from Bio.PDB import PDBList, PDBParser, Superimposer
11
 
12
 
13
- def calc_rmsd(A, B):
14
- """
15
- Calculate RMSD between two structures.
16
-
17
- Parameters
18
- ----------
19
- A : opencadd.structure.core.Structure
20
- Structure A.
21
- B : opencadd.structure.core.Structure
22
- Structure B.
23
-
24
- Returns
25
- -------
26
- float
27
- RMSD value.
28
- """
29
- aligner = MDAnalysisAligner()
30
- selection, _ = aligner.matching_selection(A, B)
31
- A = A.select_atoms(selection["reference"])
32
- B = B.select_atoms(selection["mobile"])
33
- return rms.rmsd(A.positions, B.positions, superposition=False)
34
-
35
-
36
- def calc_rmsd_matrix(structures, names):
37
- """
38
- Calculate RMSD matrix between a list of structures.
39
-
40
- Parameters
41
- ----------
42
- structures : list of opencadd.structure.core.Structure
43
- List of structures.
44
- names : list of str
45
- List of structure names.
46
-
47
- Returns
48
- -------
49
- pandas.DataFrame
50
- RMSD matrix.
51
- """
52
- values = {name: {} for name in names}
53
- for i, (A, name_i) in enumerate(zip(structures, names)):
54
- for j, (B, name_j) in enumerate(zip(structures, names)):
55
- if i == j:
56
- values[name_i][name_j] = 0.0
57
- continue
58
- if i < j:
59
- rmsd = calc_rmsd(A, B)
60
- values[name_i][name_j] = rmsd
61
- values[name_j][name_i] = rmsd
62
- continue
63
- df = pd.DataFrame.from_dict(values)
64
- return df
65
-
66
-
67
  @op("LynxKite Graph Analytics", "PDB composite search")
68
  def get_pdb_count(
69
  *, ligand_id: str, experimental_method: str, max_resolution: float, polymer_count: int
 
2
  import matplotlib.pyplot as plt
3
  import pypdb
4
  import biotite.database.rcsb as rcsb
 
 
5
  from lynxkite_core.ops import op
6
  import os
7
  import numpy as np
8
  from Bio.PDB import PDBList, PDBParser, Superimposer
9
 
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  @op("LynxKite Graph Analytics", "PDB composite search")
12
  def get_pdb_count(
13
  *, ligand_id: str, experimental_method: str, max_resolution: float, polymer_count: int
examples/Cheminformatics/requirements.txt CHANGED
@@ -1,10 +1,9 @@
 
1
  biotite
2
  pypdb
3
  rdkit
4
  MDAnalysis
5
  redo
6
- https://github.com/volkamerlab/opencadd/archive/master.tar.gz
7
  seaborn
8
- =======
9
  chembl_webresource_client
10
  rcsb-api
 
1
+ biopython
2
  biotite
3
  pypdb
4
  rdkit
5
  MDAnalysis
6
  redo
 
7
  seaborn
 
8
  chembl_webresource_client
9
  rcsb-api
uv.lock CHANGED
@@ -2551,7 +2551,7 @@ requires-dist = [
2551
  { name = "scanpy", specifier = ">=1.11.3" },
2552
  { name = "scikit-learn", specifier = ">=1.7.1" },
2553
  { name = "scipy", specifier = ">=1.15.2" },
2554
- { name = "torch", specifier = ">=2.7.0" },
2555
  ]
2556
 
2557
  [[package]]
 
2551
  { name = "scanpy", specifier = ">=1.11.3" },
2552
  { name = "scikit-learn", specifier = ">=1.7.1" },
2553
  { name = "scipy", specifier = ">=1.15.2" },
2554
+ { name = "torch", specifier = ">=2.7.0", index = "https://pypi.org/simple" },
2555
  ]
2556
 
2557
  [[package]]