File size: 12,157 Bytes
795bf6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aff0835
fae976e
 
 
4573299
 
 
795bf6b
 
 
 
 
 
 
aff0835
 
 
4573299
 
 
795bf6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4573299
795bf6b
8b870a7
795bf6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4573299
 
 
c451855
795bf6b
 
 
a12377e
 
795bf6b
 
 
 
 
 
 
 
 
4573299
 
 
 
 
 
 
 
795bf6b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
import openai
import os
from openai import OpenAI
import gradio as gr 
from elevenlabs.client import ElevenLabs
from elevenlabs import stream
import base64

api_key = os.getenv("OPENAI_API_KEY")
eleven_key = os.getenv("eleven_key")

elevenlabs = ElevenLabs(api_key=eleven_key)
client = OpenAI(api_key=api_key)

cafchem_tools = [
		 {
        "type" : "mcp",
        "server_label":"cafiero-proteinagent",
        "server_url":"https://cafierom-proteinagent.hf.space/gradio_api/mcp/",
        "require_approval": "never",
        "allowed_tools": ["ProteinAgent_ProteinAgent"],
    },
    {
        "type" : "mcp",
        "server_label":"cafiero-propagent",
        "server_url":"https://cafierom-propagent.hf.space/gradio_api/mcp/",
        "require_approval": "never",
        "allowed_tools": ["PropAgent_PropAgent"],
    },
    {
        "type" : "mcp",
        "server_label":"cafiero-moleculeagent",
        "server_url":"https://cafierom-moleculeagent.hf.space/gradio_api/mcp/",
        "require_approval": "never",
        "allowed_tools": ["MoleculeAgent_MoleculeAgent"],
    },
    {
        "type" : "mcp",
        "server_label":"cafiero-dockagent",
        "server_url":"https://cafierom-dockagent.hf.space/gradio_api/mcp/",
        "require_approval": "never",
        "allowed_tools": ["DockAgent_DockAgent"],
    },
		 ]

chat_history = []
global last_id
last_id = None

def chat(prompt, tools, voice_choice):
    chat_history.append(
                    {"role": "user", "content": prompt}
    )
    global last_id

    if tools == "Yes":
        if (last_id != None):
            response = client.responses.create(
                        instructions = 'You are a drug design assistant. Use tools provided for information; do not insert \
                        your own knowledge if the information can be obtained from a tool. When calling a tool give complete \
                        sentences for the task parameter. For example, do not define the task as "fetch smiles", rather, say: \
                        "find the smiles string for the molecule". If you need to call the ProteinAgent_ProteinAgent tool for \
                        either IC50 prediction or novel molecule generation, then you need to call have the tool get bioactive \
                        molecules first, so that the correct dataset can be obtained.',
                        model = "o4-mini",
                        tools = cafchem_tools,
                        input=prompt,
                        previous_response_id = last_id
            )
        else:
            response = client.responses.create(
                        instructions = 'You are a drug design assistant. Use tools provided for information; do not insert \
                        your own knowledge if the information can be obtained from a tool. When calling a tool give complete \
                        sentences for the task parameter. For example, do not define the task as "fetch smiles", rather, say: \
                        "find the smiles string for the molecule". If you need to call the ProteinAgent_ProteinAgent tool for \
                        either IC50 prediction or novel molecule generation, then you need to call have the tool get bioactive \
                        molecules first, so that the correct dataset can be obtained.',
                        model = "o4-mini",
                        tools = cafchem_tools,
                        input=prompt
            )
    else:
        if (last_id != None):
            response = client.responses.create(
                        model = "o4-mini",
        				input=prompt,
                        previous_response_id = last_id
            )
        else:
            response = client.responses.create(
                    model = "o4-mini",
    				input=prompt
            )

    chat_history.append(
                    {"role": "assistant", "content": response.output_text}
    )
    last_id = response.id

    if voice_choice == "On":
        elita_text = response.output_text
    
        voice_settings = {
              "stability": 0.37,
              "similarity_boost": 0.90,
              "style": 0.0,
              "speed": 1.05
              }
    
        audio_stream = elevenlabs.text_to_speech.convert(
          text = elita_text,
          voice_id = 'vxO9F6g9yqYJ4RsWvMbc',
          model_id = 'eleven_multilingual_v2',
          output_format='mp3_44100_128',
          voice_settings=voice_settings
        )
    
        audio_converted = b"".join(audio_stream)
        audio = base64.b64encode(audio_converted).decode("utf-8")
        audio_player = f'<audio src="data:audio/mpeg;base64,{audio}" controls autoplay></audio>'
    else:
        audio_player = ''
    
    return "", chat_history, audio_player

def clear_history():
	global chat_history
	chat_history = []
	global last_id
	last_id = None

def voice_from_file(file_name):
  audio_file = file_name
  with open(audio_file, 'rb') as audio_bytes:
              audio = base64.b64encode(audio_bytes.read()).decode("utf-8")
  audio_player = f'<audio src="data:audio/mpeg;base64,{audio}" controls autoplay></audio>'
  return audio_player
    
def prot_workflow():
  elita_text = "Starting with a protein, try searching for Uniprot IDs, followed by Chembl IDs. \
Then you can look for bioactive molecules for each Chembl ID. You can also search for crystal structures \
in the PDB and get titles of those structures, sequences, numbers of chains, and small molecules in the structure. \
Generate novel bioactive molecules based on a protein Chembl ID using a GPT, or predict an IC50 for a molecule \
based on a protein Chembl ID using a gradient-boosting model."
  messages = [{'role': 'assistant', 'content': elita_text}]
  audio_player = voice_from_file('protein_chat.mp3')
  return audio_player, messages

def mol_workflow():
  elita_text = "Starting with a molecule, try finding it's Lipinski properties, or its pharmacophore similarity to a known active. \
Find similar molecules or, if it has substituted rings, find analogues."
  messages = [{'role': 'assistant', 'content': elita_text}]
  audio_player = voice_from_file('mol_wf.mp3')
  return audio_player, messages

def combo_workflow():
  elita_text ="Starting with a protein and a molecule, try docking the molecule in the protein. If you have a Chembl ID, predict the IC50 \
value of the molecule in the protein."
  messages = [{'role': 'assistant', 'content': elita_text}]
  audio_player = voice_from_file('combo_wf.mp3')
  return audio_player, messages

def prot_accordions():
  elita_text = 'Try queries like: find UNIPROT IDs for the protein MAOB; find PDB IDs for MAOB; how many chains \
are in the PDB structure 4A7G; find PDB IDs matching the protein MAOB; list the bioactive molecules for the CHEMBL \
ID CHEMBL2039; dock the molecule CCCC(F) in the protein DRD2; predict the IC50 value for CCCC(F) based on the CHEMBL \
ID CHEMBL2039; or generate novel molecules based on the CHEMBL ID CHEMBL2039.'
  messages = [{'role': 'assistant', 'content': elita_text}]
  audio_player = voice_from_file('protein.mp3')
  return audio_player, messages

def mol_accordions():
  elita_text = 'Try queries like: Find the name of CCCF, find the smiles for paracetamol, or find molecules similar to paracetamol.'
  messages = [{'role': 'assistant', 'content': elita_text}]
  audio_player = voice_from_file('mol.mp3')
  return audio_player, messages 

def prop_accordions():
  elita_text = 'Try queries like: Find Lipinski properties for CCCF, find pharmacophore-similarity between \
  CCCF and CCCBr, or generate analogues of c1ccc(O)cc1.'
  messages = [{'role': 'assistant', 'content': elita_text}]
  audio_player = voice_from_file('Props.mp3')
  return audio_player, messages

def dock_accordions():
  elita_text = 'Try queries like: dock CCC(F) in the protein MAOB'
  messages = [{'role': 'assistant', 'content': elita_text}]
  audio_player = voice_from_file('Dock.mp3')
  return audio_player, messages

with gr.Blocks() as forest:
  top = gr.Markdown(
      """
      # Chat with MoDrAg! Use the *Mo*dular *Dr*ug Design *Ag*ent's Drug design tools through OpenAI 04-mini using the Model Context Protocol! (MCP)
      - Currently using the tools below:
      """)
  with gr.Row():
    with gr.Accordion("Protein Agent - Click to open/close.", open=False)as prot:
      gr.Markdown('''
                  - Find Uniprot IDs for a protein/gene name.
                  - report the number of bioactive molecules for a protein, organized by Chembl ID.
                  - report the SMILES and IC50 values of bioactive molecules for a particular Chembl ID.
                  - find protein sequences, report number fo chains.
                  - find small molecules present in a PDB structure.
                  - find PDB IDs that match a protein.
                  - predict the IC50 value of a small molecule based on a Chembl ID.
                  - generate novel molecules based on a Chembl ID.
      ''')
    with gr.Accordion("Molecule Agent - Click to open/close.", open=False) as mol:
      gr.Markdown('''
                  - find the name of a molecule from the SMILES string.
                  - find the SMILES string of a molecule from the name
                  - find similar or related molecules with some basic properties from a name or SMILES.
      ''')
    with gr.Accordion("Property Agent - Click to open/close.", open=False) as prop:
      gr.Markdown('''
                  - calculate Lipinski properties from a SMILES string. 
                  - find the pharmacophore-similarity between two molecules (a molecule and a reference).
                  - generate analogues of ring molecules and report their QED values.
      ''')
    with gr.Accordion("Docking Agent - Click to open/close.", open=False) as dock:
      gr.Markdown('''
                  - Find the docking score and pose coordinates for a molecules defined by a SMILES string in on of the proteins below:
                  - IGF1R,JAK2,KIT,LCK,MAPK14,MAPKAPK2,MET,PTK2,PTPN1,SRC,ABL1,AKT1,AKT2,CDK2,CSF1R,EGFR,KDR,MAPK1,FGFR1,ROCK1,MAP2K1,
                  PLK1,HSD11B1,PARP1,PDE5A,PTGS2,ACHE,MAOB,CA2,GBA,HMGCR,NOS1,REN,DHFR,ESR1,ESR2,NR3C1,PGR,PPARA,PPARD,PPARG,AR,THRB,
                  ADAM17,F10,F2,BACE1,CASP3,MMP13,DPP4,ADRB1,ADRB2,DRD2,DRD3,ADORA2A,CYP2C9,CYP3A4,HSP90AA1
        ''')
  with gr.Row():
    molecule_workflow = gr.Button(value = "Sample Molecule Workflow")
    protein_workflow = gr.Button(value = "Sample Protein Workflow")
    combined_workflow = gr.Button(value = "Sample Combined Workflow")

  with gr.Row():
    tools = gr.Radio(choices = ["Yes", "No"],label="Use CafChem tools?",interactive=True, value = "Yes", scale = 2)
    voice_choice = gr.Radio(choices = ['On', 'Off'],label="Audio Voice Response?", interactive=True, value='Off', scale = 2)
  
  chatbot = gr.Chatbot()

  
  msg = gr.Textbox(label="Type your messages here and hit enter.", scale = 2)
  chat_btn = gr.Button(value = "Send", scale = 0)
  talk_ele = gr.HTML() 
  
  clear = gr.ClearButton([msg, chatbot])
  
  chat_btn.click(chat, [msg, tools, voice_choice], [msg, chatbot, talk_ele])
  msg.submit(chat, [msg, tools, voice_choice], [msg, chatbot, talk_ele])
  mol.expand(mol_accordions, outputs = [talk_ele, chatbot])
  prop.expand(prop_accordions, outputs = [talk_ele, chatbot])
  prot.expand(prot_accordions, outputs = [talk_ele, chatbot])
  dock.expand(dock_accordions, outputs = [talk_ele, chatbot])
  molecule_workflow.click(mol_workflow, outputs = [talk_ele, chatbot])
  protein_workflow.click(prot_workflow, outputs = [talk_ele, chatbot])
  combined_workflow.click(combo_workflow, outputs = [talk_ele, chatbot])
  clear.click(clear_history)

  @gr.render(inputs=top)
  def get_speech(args):
    audio_file = 'MoDrAg_hello.mp3'
    with open(audio_file, 'rb') as audio_bytes:
                audio = base64.b64encode(audio_bytes.read()).decode("utf-8")
    audio_player = f'<audio src="data:audio/mpeg;base64,{audio}" controls autoplay></audio>'
    talk_ele = gr.HTML(audio_player)

if __name__ == "__main__":
    forest.launch(debug=False, share=True)