Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -656,24 +656,38 @@ def first_node(state: State) -> State:
|
|
| 656 |
query_chembl = None
|
| 657 |
state["query_chembl"] = query_chembl
|
| 658 |
|
| 659 |
-
prompt = f'
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
|
| 667 |
-
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
|
| 676 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 677 |
res = chat_model.invoke(prompt)
|
| 678 |
|
| 679 |
tool_choices = str(res).split('<|assistant|>')[1].split('#')[0].strip()
|
|
@@ -719,33 +733,48 @@ def retry_node(state: State) -> State:
|
|
| 719 |
query_pdb = state["query_pdb"]
|
| 720 |
query_smiles = state["query_smiles"]
|
| 721 |
|
| 722 |
-
prompt = f'
|
| 723 |
-
|
| 724 |
-
|
| 725 |
-
|
| 726 |
-
|
| 727 |
-
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
|
| 734 |
-
|
| 735 |
-
|
| 736 |
-
|
| 737 |
-
|
| 738 |
-
|
| 739 |
-
|
| 740 |
-
|
| 741 |
-
|
| 742 |
-
|
| 743 |
-
|
| 744 |
-
|
| 745 |
-
|
| 746 |
-
|
| 747 |
-
|
| 748 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 749 |
|
| 750 |
res = chat_model.invoke(prompt)
|
| 751 |
|
|
@@ -978,13 +1007,19 @@ graph = builder.compile()
|
|
| 978 |
@spaces.GPU
|
| 979 |
def ProteinAgent(task, protein, up_id, chembl_id, pdb_id, smiles):
|
| 980 |
'''
|
| 981 |
-
This Agent can perform several protein-related tasks.
|
| 982 |
-
|
| 983 |
-
|
| 984 |
-
|
| 985 |
-
|
| 986 |
-
|
| 987 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 988 |
|
| 989 |
Args:
|
| 990 |
task: the task to carry out
|
|
|
|
| 656 |
query_chembl = None
|
| 657 |
state["query_chembl"] = query_chembl
|
| 658 |
|
| 659 |
+
prompt = f'''
|
| 660 |
+
# For the QUERY_TASK given below, determine if one or two of the tools descibed below
|
| 661 |
+
can complete the task. If so, reply with only the tool names followed by "#". If two tools
|
| 662 |
+
are required, reply with both tool names separated by a comma and followed by "#".
|
| 663 |
+
If the tools cannot complete the task, reply with "None #".
|
| 664 |
+
|
| 665 |
+
## QUERY_TASK: {query_task}.
|
| 666 |
+
|
| 667 |
+
## Tools:
|
| 668 |
+
|
| 669 |
+
- uniprot_tool: this tool takes in the user requested protein and searches UNIPROT for matches.
|
| 670 |
+
|
| 671 |
+
- It returns a string containing the uniprot protein ID, gene name, organism, and protein name.
|
| 672 |
+
|
| 673 |
+
- list_bioactives_tool: Accepts a given UNIPROT ID and searches for Chembl IDs and bioactive molecules.
|
| 674 |
+
Returns Chembl IDs and numbers of bioactive molecules.
|
| 675 |
+
|
| 676 |
+
- get_bioactives_tool: Accepts a Chembl ID and get all bioactives molecule SMILES and IC50s for that ID. Requires a
|
| 677 |
+
chembl ID, so the list_bioactives_tool should be called before this tool.
|
| 678 |
+
|
| 679 |
+
- pdb_tool: Accepts a PDB ID and queires the protein databank for the number of chains in and sequence of the
|
| 680 |
+
protein, as well as other information such as ligands in the structure.
|
| 681 |
+
|
| 682 |
+
- find_tool: Accepts a protein name and seaches for PDB IDs that match, returning the PDB ID and the title.
|
| 683 |
+
|
| 684 |
+
- predict_tool: Predicts the IC50 value for the molecule indicated by the SMILES string provided
|
| 685 |
+
Uses the LightGBM model for prediction. Requires a Chembl dataset, so the get_bioactives_tool should be called before this tool.
|
| 686 |
+
|
| 687 |
+
- gpt_tool: Uses a machine-learning GPT model to generate novel molecules for a chembl dataset. It returns a list
|
| 688 |
+
of novel molecules generated by the GPT and an image of the molecules. Requires a Chembl dataset, so the get_bioactives_tool
|
| 689 |
+
should be called before this tool.
|
| 690 |
+
'''
|
| 691 |
res = chat_model.invoke(prompt)
|
| 692 |
|
| 693 |
tool_choices = str(res).split('<|assistant|>')[1].split('#')[0].strip()
|
|
|
|
| 733 |
query_pdb = state["query_pdb"]
|
| 734 |
query_smiles = state["query_smiles"]
|
| 735 |
|
| 736 |
+
prompt = f'''
|
| 737 |
+
# You were previously given the QUERY_TASK below, and asked to determine if one
|
| 738 |
+
or two of the tools described below could complete the task. The tool choices did not succeed.
|
| 739 |
+
Please re-examine the tool choices and determine if one or two of the tools described below
|
| 740 |
+
can complete the task. If so, reply with only the tool names followed by "#". If two tools
|
| 741 |
+
are required, reply with both tool names separated by a comma and followed by "#".
|
| 742 |
+
If the tools cannot complete the task, reply with "None #".
|
| 743 |
+
|
| 744 |
+
## The information provided by the user is:
|
| 745 |
+
- QUERY_PROTEIN: {query_protein}.
|
| 746 |
+
- QUERY_UP_ID: {query_up_id}.
|
| 747 |
+
- QUERY_CHEMBL: {query_chembl}.
|
| 748 |
+
- QUERY_PDB: {query_pdb}.
|
| 749 |
+
- QUERY_SMILES: {query_smiles}.
|
| 750 |
+
|
| 751 |
+
## The task is:
|
| 752 |
+
- QUERY_TASK: {query_task}.
|
| 753 |
+
|
| 754 |
+
## Tools:
|
| 755 |
+
|
| 756 |
+
- uniprot_tool: this tool takes in the user requested protein and searches UNIPROT for matches.
|
| 757 |
+
|
| 758 |
+
- It returns a string containing the uniprot protein ID, gene name, organism, and protein name.
|
| 759 |
+
|
| 760 |
+
- list_bioactives_tool: Accepts a given UNIPROT ID and searches for Chembl IDs and bioactive molecules.
|
| 761 |
+
Returns Chembl IDs and numbers of bioactive molecules.
|
| 762 |
+
|
| 763 |
+
- get_bioactives_tool: Accepts a Chembl ID and get all bioactives molecule SMILES and IC50s for that ID. Requires a
|
| 764 |
+
chembl ID, so the list_bioactives_tool should be called before this tool.
|
| 765 |
+
|
| 766 |
+
- pdb_tool: Accepts a PDB ID and queires the protein databank for the number of chains in and sequence of the
|
| 767 |
+
protein, as well as other information such as ligands in the structure.
|
| 768 |
+
|
| 769 |
+
- find_tool: Accepts a protein name and seaches for PDB IDs that match, returning the PDB ID and the title.
|
| 770 |
+
|
| 771 |
+
- predict_tool: Predicts the IC50 value for the molecule indicated by the SMILES string provided
|
| 772 |
+
Uses the LightGBM model for prediction. Requires a Chembl dataset, so the get_bioactives_tool should be called before this tool.
|
| 773 |
+
|
| 774 |
+
- gpt_tool: Uses a machine-learning GPT model to generate novel molecules for a chembl dataset. It returns a list
|
| 775 |
+
of novel molecules generated by the GPT and an image of the molecules. Requires a Chembl dataset, so the get_bioactives_tool
|
| 776 |
+
should be called before this tool.
|
| 777 |
+
'''
|
| 778 |
|
| 779 |
res = chat_model.invoke(prompt)
|
| 780 |
|
|
|
|
| 1007 |
@spaces.GPU
|
| 1008 |
def ProteinAgent(task, protein, up_id, chembl_id, pdb_id, smiles):
|
| 1009 |
'''
|
| 1010 |
+
This Agent can perform several protein-related tasks.
|
| 1011 |
+
1. It can find UNIPROT IDs for a protein, or,
|
| 1012 |
+
2. given a UNIPROT ID it can find Chembl IDs that match.
|
| 1013 |
+
3. It can find numbers of and lists of bioactive molecules based on a Chembl ID.
|
| 1014 |
+
4. It can query the protein databank to find PDB IDs matching a protein name and return the IDs and titles.
|
| 1015 |
+
5. It can find a particular PDB ID and report information such as how many chains it contains,
|
| 1016 |
+
the sequence, and any small molecules or ligands bound in the structure.
|
| 1017 |
+
6. It can predict the IC50 value of a molecule based on a Chembl dataset using the LightGBM model.
|
| 1018 |
+
7. It can generate novel molecules using a finetuned GPT based on a Chembl dataset.
|
| 1019 |
+
|
| 1020 |
+
If Task 6 or 7 are to be called, a chembl dataset is needed. If a Chembl ID is not provided, then task 2 should be called
|
| 1021 |
+
first to find chembl IDs, then task 4 should be called to collect the dataset based on the ID. If a chembl ID is provided,
|
| 1022 |
+
then task 4 should be called to collect the chembl dataset.
|
| 1023 |
|
| 1024 |
Args:
|
| 1025 |
task: the task to carry out
|