Spaces:
Runtime error
Runtime error
TimS commited on
Commit ·
b18cdf9
1
Parent(s): d81bc91
init commit
Browse files- .gitignore +96 -0
- README.md +9 -7
- app.py +133 -4
- packages.txt +3 -0
- requirements.txt +24 -0
.gitignore
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
data/
|
| 2 |
+
checkpoint/
|
| 3 |
+
|
| 4 |
+
*.err
|
| 5 |
+
*.out
|
| 6 |
+
|
| 7 |
+
.DS_Store
|
| 8 |
+
.idea
|
| 9 |
+
.vscode
|
| 10 |
+
.vscode/
|
| 11 |
+
*.bin
|
| 12 |
+
*.pkl
|
| 13 |
+
*.pt
|
| 14 |
+
*.pyc
|
| 15 |
+
*.bk
|
| 16 |
+
*.vim
|
| 17 |
+
tokenizer/temp*.json
|
| 18 |
+
|
| 19 |
+
replace.sh
|
| 20 |
+
|
| 21 |
+
Quick*.*
|
| 22 |
+
Temp*.*
|
| 23 |
+
|
| 24 |
+
*.env
|
| 25 |
+
cov.xml
|
| 26 |
+
|
| 27 |
+
.ipynb_checkpoints/
|
| 28 |
+
|
| 29 |
+
*.py[cod]
|
| 30 |
+
|
| 31 |
+
# C extensions
|
| 32 |
+
*.so
|
| 33 |
+
|
| 34 |
+
# Packages
|
| 35 |
+
*.egg
|
| 36 |
+
*.egg-info
|
| 37 |
+
dist
|
| 38 |
+
build
|
| 39 |
+
eggs
|
| 40 |
+
parts
|
| 41 |
+
bin
|
| 42 |
+
var
|
| 43 |
+
sdist
|
| 44 |
+
develop-eggs
|
| 45 |
+
.installed.cfg
|
| 46 |
+
lib
|
| 47 |
+
lib64
|
| 48 |
+
|
| 49 |
+
# Installer logs
|
| 50 |
+
pip-log.txt
|
| 51 |
+
|
| 52 |
+
# Unit test / coverage reports
|
| 53 |
+
.coverage*
|
| 54 |
+
.tox
|
| 55 |
+
nosetests.xml
|
| 56 |
+
htmlcov
|
| 57 |
+
|
| 58 |
+
# Translations
|
| 59 |
+
*.mo
|
| 60 |
+
|
| 61 |
+
# Mr Developer
|
| 62 |
+
.mr.developer.cfg
|
| 63 |
+
.project
|
| 64 |
+
.pydevproject
|
| 65 |
+
|
| 66 |
+
# Complexity
|
| 67 |
+
output/*.html
|
| 68 |
+
output/*/index.html
|
| 69 |
+
|
| 70 |
+
# Sphinx
|
| 71 |
+
docs/_build
|
| 72 |
+
|
| 73 |
+
MANIFEST
|
| 74 |
+
|
| 75 |
+
*.tif
|
| 76 |
+
|
| 77 |
+
# Rever
|
| 78 |
+
rever/
|
| 79 |
+
|
| 80 |
+
# Dev notebooks
|
| 81 |
+
# notebooks/
|
| 82 |
+
|
| 83 |
+
# MkDocs
|
| 84 |
+
site/
|
| 85 |
+
|
| 86 |
+
.vscode
|
| 87 |
+
|
| 88 |
+
.idea/
|
| 89 |
+
|
| 90 |
+
data/
|
| 91 |
+
output/
|
| 92 |
+
wandb/
|
| 93 |
+
oracle/
|
| 94 |
+
expts/models/
|
| 95 |
+
expts/dev-data/
|
| 96 |
+
expts/notebooks/
|
README.md
CHANGED
|
@@ -1,13 +1,15 @@
|
|
| 1 |
---
|
| 2 |
-
title: FragLlama
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version: 5.
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
-
license:
|
|
|
|
| 11 |
---
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: FragLlama Demo
|
| 3 |
+
emoji: 🧪
|
| 4 |
+
colorFrom: yellow
|
| 5 |
+
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 5.0.1
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
+
license: other
|
| 11 |
+
short_description: Molecule Generation
|
| 12 |
---
|
| 13 |
|
| 14 |
+
<h1 align="center"> YDS FragLlama </h1>
|
| 15 |
+
An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
|
app.py
CHANGED
|
@@ -1,7 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
|
|
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
| 3 |
+
"""
|
| 4 |
+
# from huggingface_hub import InferenceClient
|
| 5 |
+
# client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
| 6 |
+
import os
|
| 7 |
+
import argparse
|
| 8 |
+
import warnings
|
| 9 |
import gradio as gr
|
| 10 |
+
import datamol as dm
|
| 11 |
+
from rdkit import RDLogger
|
| 12 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 13 |
+
warnings.filterwarnings("ignore", message="DEPRECATION WARNING: please use MorganGenerator")
|
| 14 |
+
RDLogger.DisableLog('rdApp.*')
|
| 15 |
|
| 16 |
+
HF_MODEL = "YDS-Pharmatech/FragLlama-base"
|
| 17 |
+
HF_SPACE = os.getenv('HF_SPACE', False)
|
| 18 |
+
# if HF_SPACE: os.chdir("/data/fraglm")
|
| 19 |
|
| 20 |
+
from fraglm.inference import FragLMDesign
|
| 21 |
+
from fraglm.utils import *; from boring_utils.utils import *
|
| 22 |
+
from fraglm.trainer.model import FragLMLlamaModel
|
| 23 |
+
from fraglm.inference.post_processing import PostProcessMode
|
| 24 |
+
device = get_device()
|
| 25 |
+
|
| 26 |
+
parser = argparse.ArgumentParser()
|
| 27 |
+
parser.add_argument('--model_path', type=str, default="", help='Path to the model')
|
| 28 |
+
parser.add_argument('--tokenizer_path', type=str, default="tokenizer/fraglm_2406_bpe_8k.json", help='Path to the model')
|
| 29 |
+
args = parser.parse_args()
|
| 30 |
+
|
| 31 |
+
if HF_SPACE:
|
| 32 |
+
model = FragLMLlamaModel.from_pretrained(HF_MODEL, token=os.getenv('HF_TOKEN')).to(device)
|
| 33 |
+
designer = FragLMDesign(model=model, tokenizer=HF_MODEL)
|
| 34 |
+
else:
|
| 35 |
+
model = FragLMLlamaModel.from_pretrained(args.model_path).to(device)
|
| 36 |
+
designer = FragLMDesign(model=model, tokenizer=args.tokenizer_path)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def scaffold_decoration(scaffold, n_samples, n_trials):
|
| 40 |
+
generated_smiles = designer.scaffold_decoration(
|
| 41 |
+
scaffold=scaffold,
|
| 42 |
+
n_samples_per_trial=n_samples,
|
| 43 |
+
n_trials=n_trials,
|
| 44 |
+
sanitize=True,
|
| 45 |
+
do_not_fragment_further=True,
|
| 46 |
+
)
|
| 47 |
+
generated_mols = [dm.to_mol(x) for x in generated_smiles]
|
| 48 |
+
|
| 49 |
+
img = dm.viz.lasso_highlight_image(
|
| 50 |
+
generated_mols,
|
| 51 |
+
dm.from_smarts(scaffold),
|
| 52 |
+
mol_size=(350, 200),
|
| 53 |
+
color_list=["#ff80b5"],
|
| 54 |
+
scale_padding=0.1,
|
| 55 |
+
use_svg=False,
|
| 56 |
+
)
|
| 57 |
+
# img = dm.to_image(
|
| 58 |
+
# generated_mols,
|
| 59 |
+
# mol_size=(350, 200),
|
| 60 |
+
# use_svg=False,
|
| 61 |
+
# )
|
| 62 |
+
return img
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def motif_extension(motif, n_samples, n_trials, min_length, max_length):
|
| 66 |
+
generated_smiles = designer.motif_extension(
|
| 67 |
+
motif=motif,
|
| 68 |
+
n_samples_per_trial=n_samples,
|
| 69 |
+
n_trials=n_trials,
|
| 70 |
+
sanitize=True,
|
| 71 |
+
do_not_fragment_further=False,
|
| 72 |
+
min_length=min_length,
|
| 73 |
+
max_length=max_length,
|
| 74 |
+
)
|
| 75 |
+
img = dm.to_image(
|
| 76 |
+
generated_smiles,
|
| 77 |
+
mol_size=(350, 200),
|
| 78 |
+
use_svg=False,
|
| 79 |
+
)
|
| 80 |
+
return img
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def linker_generation(linker1, linker2, n_samples, n_trials):
|
| 84 |
+
linker_generation = [linker1, linker2]
|
| 85 |
+
generated_smiles = designer.linker_generation(
|
| 86 |
+
*linker_generation,
|
| 87 |
+
n_samples_per_trial=n_samples,
|
| 88 |
+
n_trials=n_trials,
|
| 89 |
+
sanitize=True,
|
| 90 |
+
do_not_fragment_further=False,
|
| 91 |
+
random_seed=100,
|
| 92 |
+
post_process_mode=PostProcessMode.DO_NOTHING
|
| 93 |
+
)
|
| 94 |
+
img = dm.to_image(
|
| 95 |
+
generated_smiles,
|
| 96 |
+
mol_size=(350, 200),
|
| 97 |
+
use_svg=False,
|
| 98 |
+
)
|
| 99 |
+
return img
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
with gr.Blocks() as demo:
|
| 103 |
+
gr.Markdown("# FragLlama Demo")
|
| 104 |
+
|
| 105 |
+
with gr.Tab("Scaffold Decoration"):
|
| 106 |
+
scaffold_input = gr.Textbox(label="Scaffold SMILES")
|
| 107 |
+
scaffold_n_samples = gr.Slider(1, 20, 12, step=1, label="Samples per Trial")
|
| 108 |
+
scaffold_n_trials = gr.Number(value=2, label="Number of Trials")
|
| 109 |
+
scaffold_button = gr.Button("Generate")
|
| 110 |
+
scaffold_output = gr.Image(type="pil", label="Generated Molecules")
|
| 111 |
+
scaffold_button.click(scaffold_decoration, inputs=[scaffold_input, scaffold_n_samples, scaffold_n_trials], outputs=scaffold_output)
|
| 112 |
+
|
| 113 |
+
with gr.Tab("Motif Extension"):
|
| 114 |
+
motif_input = gr.Textbox(label="Motif SMILES")
|
| 115 |
+
motif_n_samples = gr.Slider(1, 20, 12, step=1, label="Samples per Trial")
|
| 116 |
+
motif_n_trials = gr.Number(value=1, label="Number of Trials")
|
| 117 |
+
motif_min_length = gr.Number(value=25, label="Min Length")
|
| 118 |
+
motif_max_length = gr.Number(value=80, label="Max Length")
|
| 119 |
+
motif_button = gr.Button("Generate")
|
| 120 |
+
# motif_output = gr.Image(label="Generated Molecules")
|
| 121 |
+
motif_output = gr.Image(type="pil", label="Generated Molecules")
|
| 122 |
+
motif_button.click(motif_extension, inputs=[motif_input, motif_n_samples, motif_n_trials, motif_min_length, motif_max_length], outputs=motif_output)
|
| 123 |
+
|
| 124 |
+
with gr.Tab("Linker Generation"):
|
| 125 |
+
linker1_input = gr.Textbox(label="Linker 1 SMILES")
|
| 126 |
+
linker2_input = gr.Textbox(label="Linker 2 SMILES")
|
| 127 |
+
linker_n_samples = gr.Slider(1, 20, 12, step=1, label="Samples per Trial")
|
| 128 |
+
linker_n_trials = gr.Number(value=1, label="Number of Trials")
|
| 129 |
+
linker_button = gr.Button("Generate")
|
| 130 |
+
# linker_output = gr.Image(label="Generated Molecules")
|
| 131 |
+
linker_output = gr.Image(type="pil", label="Generated Molecules")
|
| 132 |
+
linker_button.click(linker_generation, inputs=[linker1_input, linker2_input, linker_n_samples, linker_n_trials], outputs=linker_output)
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
if __name__ == "__main__":
|
| 136 |
+
demo.launch()
|
packages.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
tmux
|
| 2 |
+
neovim
|
| 3 |
+
ranger
|
requirements.txt
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
tqdm
|
| 2 |
+
loguru
|
| 3 |
+
typer
|
| 4 |
+
universal_pathlib
|
| 5 |
+
datamol
|
| 6 |
+
pandas
|
| 7 |
+
numpy
|
| 8 |
+
torch>=2.0 --extra-index-url https://download.pytorch.org/whl/cu113
|
| 9 |
+
transformers
|
| 10 |
+
datasets
|
| 11 |
+
tokenizers
|
| 12 |
+
accelerate>=0.28.0
|
| 13 |
+
evaluate
|
| 14 |
+
wandb
|
| 15 |
+
huggingface_hub
|
| 16 |
+
deepspeed
|
| 17 |
+
ruff
|
| 18 |
+
nbconvert
|
| 19 |
+
jupyterlab
|
| 20 |
+
ipywidgets
|
| 21 |
+
s3cmd
|
| 22 |
+
git+https://github.com/TimS-ml/Boring-Utils.git
|
| 23 |
+
-e /data/fraglm
|
| 24 |
+
--no-deps
|