PhDFlo commited on
Commit
ac06ce3
·
1 Parent(s): c917fbf

update from 10/06/2025

Browse files
Files changed (2) hide show
  1. app.py +144 -142
  2. introduction_page.md +106 -0
app.py CHANGED
@@ -63,34 +63,39 @@ def select_best_model(
63
 
64
  # Definition of the tools for the MCP server
65
  # Function to return a fasta file
66
- def create_fasta_file(sequence: str, name: Optional[str] = None, seq_name: Optional[str] = None) -> str:
67
  """Create a FASTA file from a protein sequence string with a unique name.
68
 
69
  Args:
70
- sequence (str): The protein sequence string with optional line breaks
71
- name (str, optional): Name to use for the FASATA file. If not provided, a unique ID will be generated
72
- seq_name (str, optional): The name/identifier for the sequence. Defaults to "PROTEIN"
73
 
74
 
75
  Returns:
76
  str: Name of the created FASTA file
77
  """
 
 
 
 
 
78
  # Remove any trailing/leading whitespace but preserve line breaks
79
- lines = sequence.strip().split('\n')
80
 
81
  # Check if the first line is a FASTA header
82
  if not lines[0].startswith('>'):
83
  # If no header provided, add one
84
  if seq_name is None:
85
- seq_name = "PROTEIN"
86
- sequence = f">{seq_name}\n{sequence}"
87
 
88
  # Create FASTA content (preserving line breaks)
89
- fasta_content = sequence
90
 
91
  # Generate a unique file name
92
  unique_id = hashlib.sha256(uuid4().bytes).hexdigest()[:8]
93
- file_name = f"chai1_{name if name else unique_id}_input.fasta"
94
  file_path = here / "inputs/fasta" / file_name
95
 
96
  # Write the FASTA file
@@ -113,7 +118,7 @@ def create_json_config(
113
  num_trunk_recycles (int): Number of trunk recycles from slider
114
  seed (int): Random seed from slider
115
  options (list): List of selected options from checkbox group
116
- name (str, optional): Name to use for the config file. If not provided, a unique ID will be generated
117
 
118
  Returns:
119
  str: Name of the created JSON file
@@ -132,7 +137,7 @@ def create_json_config(
132
  }
133
 
134
  # Generate file name based on provided name or unique ID
135
- file_name = f"chai1_{name if name else hashlib.sha256(uuid4().bytes).hexdigest()[:8]}_config.json"
136
  file_path = here / "inputs/config" / file_name
137
 
138
  # Write the JSON file
@@ -142,43 +147,43 @@ def create_json_config(
142
 
143
  # Function to compute Chai1 inference
144
  def compute_Chai1(
145
- fasta_file: Optional[str] = "",
146
- inference_config_file: Optional[str] = "",
147
  ):
148
  """Compute a Chai1 simulation.
149
 
150
  Args:
151
- fasta_file (str, optional): FASTA file name containing the protein sequence.
152
  If not provided, uses the default input file.
153
- inference_config_file (str, optional): JSON configuration file name for inference.
154
  If not provided, uses the default quick inference configuration.
155
 
156
  Returns:
157
- str: Output PDB file name containing the predicted structure.
158
  """
 
159
  with app.run():
160
-
161
  force_redownload = False
162
 
163
  print("🧬 checking inference dependencies")
164
  download_inference_dependencies.remote(force=force_redownload)
165
 
166
- # Define fasta file
167
- if not fasta_file:
168
- fasta_file = here / "inputs/fasta" / "chai1_default_input.fasta"
169
- print(f"🧬 running Chai inference on {fasta_file}")
170
- fasta_file = here / "inputs/fasta" / fasta_file
171
- print(fasta_file)
172
- fasta_content = Path(fasta_file).read_text()
173
 
174
  # Define inference config file
175
- if not inference_config_file:
176
- inference_config_file = here / "inputs/config" / "chai1_quick_inference.json"
177
- inference_config_file = here / "inputs/config" / inference_config_file
178
- print(f"🧬 loading Chai inference config from {inference_config_file}")
179
- inference_config = json.loads(Path(inference_config_file).read_text())
180
 
181
- # Generate a unique run ID
182
  run_id = hashlib.sha256(uuid4().bytes).hexdigest()[:8] # short id
183
  print(f"🧬 running inference with {run_id=}")
184
 
@@ -189,24 +194,82 @@ def compute_Chai1(
189
  output_dir.mkdir(parents=True, exist_ok=True)
190
 
191
  print(f"🧬 saving results to disk locally in {output_dir}")
 
 
 
 
192
  for ii, (scores, cif) in enumerate(results):
193
- (Path(output_dir, "score") / f"{run_id}-scores.model_idx_{ii}.npz").write_bytes(scores)
194
- (Path(output_dir, "molecules") / f"{run_id}-preds.model_idx_{ii}.cif").write_text(cif)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
 
196
- best_model, max_aggregate_score = select_best_model(
197
- run_id=run_id,
198
- scores_to_print=["aggregate_score", "ptm", "iptm"],
199
- number_of_scores=len(results),
200
- results_dir=str(Path(output_dir, "score"))
201
- )
202
- # Take the last cif file and convert it to pdb
203
- cif_name = str(Path(output_dir, "molecules"))+"/"+str(run_id)+"-preds.model_idx_"+str(best_model)+".cif"
204
- pdb_name = cif_name.split('.cif')[0] + '.pdb'
205
- st = gemmi.read_structure(cif_name)
206
- st.write_minimal_pdb(pdb_name)
207
 
208
- return pdb_name
 
 
 
 
 
209
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
  # Create the Gradio interface
212
  reps = [{"model": 0,"style": "cartoon","color": "hydrophobicity"}]
@@ -223,100 +286,16 @@ with gr.Blocks(theme=theme) as demo:
223
 
224
  gr.Image("images/logo1.png", show_label=False,width=400)
225
 
226
- gr.Markdown(
227
- """
228
-
229
- # Stakes
230
-
231
- The industry is being deeply changed by the development of LLMs and the recent possibilities to provide them access to external tools.
232
- For years now companies are using simulation tools in order faster and reduce the development cost of a product.
233
- One of the challenge in the coming years will be to create agents that can setup, run and process simulations to faster the development of new products.
234
-
235
- # Objective
236
-
237
- This project is a first step in this creating AI agents that perform simulations on existing softwares.
238
- 1) Several domains are of major interest:
239
- - CFD (Computational Fluid Dynamics) simulations
240
- - Biology simulations (Protein Folding, Molecular Dynamics, etc.)
241
- - All applications that use neural networks
242
-
243
- --> This project is focused on the protein folding domain, but the same principles can be applied to other domains.
244
-
245
- 2) Generally, industrial computations are performed on HPC clusters, which have access to large ressources.
246
-
247
- --> The simulation need to run on a separate server
248
-
249
- 3) The LLM needs to be able to access the simulation results in order to provide a complete answer to the user.
250
-
251
- --> The simulation results need to be accessible by the LLM
252
-
253
- ## Modal
254
-
255
- Modal (https://modal.com/) is a serverless platform that provides a simple way to run any application with the latest CPU and GPU hardware.
256
-
257
- ## Chai-1 Model
258
-
259
- Chai-1 (https://www.chaidiscovery.com/blog/introducing-chai-1) is a multi-modal foundation model for molecular structure prediction that performs at the state-of-the-art across a variety of benchmarks.
260
- Chai-1 enables unified prediction of proteins, small molecules, DNA, RNA, glycosylations, and more.
261
- Chai-1 use on Modal server is an example on how to run folding simulations.
262
- Thus, it is a good choice to start with.
263
-
264
- # Instructions
265
- 1. Upload a Fasta sequence file containing the molecule sequence.
266
- 2. Click the "Run" button to start the simulation.
267
- 3. The output will be a 3D visualization of the molecule.
268
-
269
- ## Simulation parameters choice
270
- If no config or fasta files are created, default values are chosen:
271
- - chai1_default_input.fasta
272
- - chai1_quick_inference.json
273
-
274
- The files content is diplayed at the bottom of the page.
275
- The default json configuration makes the computation fast (about 2min) but results can be disappointing.
276
- Please use chai1_default_inference.json to have a wonderful protein 😃.
277
-
278
- - chai1_default_input.fasta
279
- ```
280
- >protein|name=example-of-long-protein
281
- AGSHSMRYFSTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASPRGEPRAPWVEQEGPEYWDRETQKYKRQAQTDRVSLRNLRGYYNQSEAGSHTLQWMFGCDLGPDGRLLRGYDQSAYDGKDYIALNEDLRSWTAADTAAQITQRKWEAAREAEQRRAYLEGTCVEWLRRYLENGKETLQRAEHPKTHVTHHPVSDHEATLRCWALGFYPAEITLTWQWDGEDQTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPLTLRWEP
282
- >protein|name=example-of-short-protein
283
- AIQRTPKIQVYSRHPAENGKSNFLNCYVSGFHPSDIEVDLLKNGERIEKVEHSDLSFSKDWSFYLLYYTEFTPTEKDEYACRVNHVTLSQPKIVKWDRDM
284
- >protein|name=example-peptide
285
- GAAL
286
- >ligand|name=example-ligand-as-smiles
287
- CCCCCCCCCCCCCC(=O)O
288
- ```
289
- - chai1_quick_inference.json
290
- ```json
291
- {
292
- "num_trunk_recycles": 1,
293
- "num_diffn_timesteps": 10,
294
- "seed": 42,
295
- "use_esm_embeddings": true
296
- "use_msa_server": false
297
- }
298
- ```
299
-
300
- # Work performed
301
- This interface allows you to run Chai1 simulations on a given Fasta sequence file.
302
- The Chai1 model is designed to predict the 3D structure of proteins based on their amino acid sequences.
303
- You can input a Fasta file containing the sequence of the molecule you want to simulate, and the output will be a 3D representation of the molecule based on the Chai1 model.
304
-
305
- You can input a Fasta file containing the sequence of the molecule you want to simulate.
306
- The output will be a 3D representation of the molecule based on the Chai1 model.
307
-
308
- # Disclaimer
309
- This interface is for educational and research purposes only. The results may vary based on the input sequence and the Chai1 model's capabilities.
310
- # Contact
311
- For any issues or questions, please contact the developer or refer to the documentation.
312
- """)
313
 
314
 
315
  with gr.Tab("Configuration 📦"):
316
 
317
  gr.Markdown(
318
  """
319
- ## Fasta file and configuration generator
320
  """)
321
 
322
  with gr.Row():
@@ -324,15 +303,15 @@ with gr.Blocks(theme=theme) as demo:
324
  slider_nb = gr.Slider(1, 500, value=300, label="Number of diffusion time steps", info="Choose the number of diffusion time steps for the simulation", step=1, interactive=True, elem_id="num_iterations")
325
  slider_trunk = gr.Slider(1, 5, value=3, label="Number of trunk recycles", info="Choose the number of iterations for the simulation", step=1, interactive=True, elem_id="trunk_number")
326
  slider_seed = gr.Slider(1, 100, value=42, label="Seed", info="Choose the seed", step=1, interactive=True, elem_id="seed")
327
- check_options = gr.CheckboxGroup(["ESM_embeddings", "MSA_server"], value=["ESM_embeddings",], label="Additionnal options", info="Options to use ESM embeddings and MSA server", elem_id="options")
328
- config_name = gr.Textbox(placeholder="Enter a name for the config (optional)", label="Configuration file name")
329
  button_json = gr.Button("Create Config file")
330
  button_json.click(fn=create_json_config, inputs=[slider_nb, slider_trunk, slider_seed, check_options, config_name], outputs=[])
331
 
332
 
333
  with gr.Column(scale=1):
334
  fasta_input = gr.Textbox(placeholder="Fasta format sequences", label="Fasta content", lines=10)
335
- fasta_name = gr.Textbox(placeholder="Enter a name for the fasta file (optional)", label="Fasta file name")
336
  fasta_button = gr.Button("Create Fasta file")
337
  fasta_button.click(fn=create_fasta_file, inputs=[fasta_input, fasta_name], outputs=[])
338
 
@@ -352,29 +331,52 @@ with gr.Blocks(theme=theme) as demo:
352
  inp1 = gr.FileExplorer(root_dir=here / "inputs/fasta",
353
  value="chai1_default_input.fasta",
354
  label="Input Fasta file",
355
- file_count='single',
356
- glob="*.fasta")
357
 
358
  with gr.Column(scale=1):
359
  inp2 = gr.FileExplorer(root_dir=here / "inputs/config",
360
  value="chai1_quick_inference.json",
361
  label="Configuration file",
362
- file_count='single',
363
- glob="*.json")
364
  btn_refresh = gr.Button("Refresh available files")
365
 
366
  # Only workaround I found to update the file explorer
367
  def update_file_explorer():
 
368
  return gr.FileExplorer(root_dir=here), gr.FileExplorer(root_dir=here)
369
  def update_file_explorer_2():
 
370
  return gr.FileExplorer(root_dir=here / "inputs/fasta"), gr.FileExplorer(root_dir=here / "inputs/config")
371
 
372
  btn_refresh.click(update_file_explorer, outputs=[inp1,inp2]).then(update_file_explorer_2, outputs=[inp1, inp2])
373
 
374
- out = Molecule3D(label="Plot the 3D Molecule", reps=reps)
375
- btn = gr.Button("Run")
376
- btn.click(fn=compute_Chai1, inputs=[inp1 , inp2], outputs=[out])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
 
 
 
 
378
 
379
  # Launch both the Gradio web interface and the MCP server
380
  if __name__ == "__main__":
 
63
 
64
  # Definition of the tools for the MCP server
65
  # Function to return a fasta file
66
+ def create_fasta_file(file_content: str, name: Optional[str] = None, seq_name: Optional[str] = None) -> str:
67
  """Create a FASTA file from a protein sequence string with a unique name.
68
 
69
  Args:
70
+ file_content (str): The content of the FASTA file required with optional line breaks
71
+ name (str, optional): FASTA file name ending with .fasta ideally. If not provided, a unique ID will be generated
72
+ seq_name (str, optional): The name/identifier for the sequence. Defaults to "protein"
73
 
74
 
75
  Returns:
76
  str: Name of the created FASTA file
77
  """
78
+ # If the file_content is empty, raise an error
79
+ if not file_content.strip():
80
+ print("Fasta file content cannot be empty so the example fasta file will be used")
81
+ file_content = ">protein|name=example-protein\nAGSHSMRYFSTSVSRPGRGEPRFIAVGYVDDTQFVRFD"
82
+
83
  # Remove any trailing/leading whitespace but preserve line breaks
84
+ lines = file_content.strip().split('\n')
85
 
86
  # Check if the first line is a FASTA header
87
  if not lines[0].startswith('>'):
88
  # If no header provided, add one
89
  if seq_name is None:
90
+ seq_name = "protein"
91
+ file_content = f">{seq_name}\n{file_content}"
92
 
93
  # Create FASTA content (preserving line breaks)
94
+ fasta_content = file_content
95
 
96
  # Generate a unique file name
97
  unique_id = hashlib.sha256(uuid4().bytes).hexdigest()[:8]
98
+ file_name = f"{name if name else unique_id}"
99
  file_path = here / "inputs/fasta" / file_name
100
 
101
  # Write the FASTA file
 
118
  num_trunk_recycles (int): Number of trunk recycles from slider
119
  seed (int): Random seed from slider
120
  options (list): List of selected options from checkbox group
121
+ name (str, optional): JSON config file name ending with .json ideally. If not provided, a unique ID will be generated
122
 
123
  Returns:
124
  str: Name of the created JSON file
 
137
  }
138
 
139
  # Generate file name based on provided name or unique ID
140
+ file_name = f"{name if name else hashlib.sha256(uuid4().bytes).hexdigest()[:8]}"
141
  file_path = here / "inputs/config" / file_name
142
 
143
  # Write the JSON file
 
147
 
148
  # Function to compute Chai1 inference
149
  def compute_Chai1(
150
+ fasta_file_name: Optional[str] = "",
151
+ inference_config_file_name: Optional[str] = "",
152
  ):
153
  """Compute a Chai1 simulation.
154
 
155
  Args:
156
+ fasta_file_name (str, optional): FASTA file name to use for the Chai1 simulation.
157
  If not provided, uses the default input file.
158
+ inference_config_file_name (str, optional): JSON configuration file name for inference.
159
  If not provided, uses the default quick inference configuration.
160
 
161
  Returns:
162
+ pd.DataFrame: DataFrame containing model scores and CIF file paths
163
  """
164
+ import pandas as pd
165
  with app.run():
 
166
  force_redownload = False
167
 
168
  print("🧬 checking inference dependencies")
169
  download_inference_dependencies.remote(force=force_redownload)
170
 
171
+ # Define fasta file
172
+ if not fasta_file_name:
173
+ fasta_file_name = here / "inputs/fasta" / "chai1_default_input.fasta"
174
+ print(f"🧬 running Chai inference on {fasta_file_name}")
175
+ fasta_file_name = here / "inputs/fasta" / fasta_file_name
176
+ print(fasta_file_name)
177
+ fasta_content = Path(fasta_file_name).read_text()
178
 
179
  # Define inference config file
180
+ if not inference_config_file_name:
181
+ inference_config_file_name = here / "inputs/config" / "chai1_quick_inference.json"
182
+ inference_config_file_name = here / "inputs/config" / inference_config_file_name
183
+ print(f"🧬 loading Chai inference config from {inference_config_file_name}")
184
+ inference_config = json.loads(Path(inference_config_file_name).read_text())
185
 
186
+ # Generate a unique run ID
187
  run_id = hashlib.sha256(uuid4().bytes).hexdigest()[:8] # short id
188
  print(f"🧬 running inference with {run_id=}")
189
 
 
194
  output_dir.mkdir(parents=True, exist_ok=True)
195
 
196
  print(f"🧬 saving results to disk locally in {output_dir}")
197
+
198
+ # Create lists to store data for DataFrame
199
+ model_data = []
200
+
201
  for ii, (scores, cif) in enumerate(results):
202
+ score_file = Path(output_dir, "score") / f"{run_id}-scores.model_idx_{ii}.npz"
203
+ cif_file = Path(output_dir, "molecules") / f"{run_id}-preds.model_idx_{ii}.cif"
204
+
205
+ score_file.write_bytes(scores)
206
+ cif_file.write_text(cif)
207
+
208
+ # Load score data
209
+ data = load(str(score_file))
210
+
211
+ if not data["has_inter_chain_clashes"][0]:
212
+ model_data.append({
213
+ "Model Index": ii,
214
+ "Aggregate Score": float(data["aggregate_score"][0]),
215
+ "PTM": float(data["ptm"][0]),
216
+ "IPTM": float(data["iptm"][0]),
217
+ "CIF File": str(cif_file).split("/")[-1], # Get just the file name
218
+ })
219
 
220
+ # Create DataFrame from collected data
221
+ results_df = pd.DataFrame(model_data).sort_values("Aggregate Score", ascending=False)
 
 
 
 
 
 
 
 
 
222
 
223
+ return results_df
224
+
225
+
226
+ # Function to plot the 3D protein structure
227
+ def plot_protein(result_df) -> str:
228
+ """Plot the 3D structure of a protein using the DataFrame from compute_Chai1.
229
 
230
+ Args:
231
+ result_df (pd.DataFrame): DataFrame containing model information and scores
232
+
233
+ Returns:
234
+ str: Path to the generated PDB file of the best model.
235
+ """
236
+ if result_df.empty:
237
+ return "" # Return empty string instead of None for type safety
238
+
239
+ # Get the CIF file path of the model with highest aggregate score (already sorted)
240
+ best_cif = str(Path("results/molecules") / result_df.iloc[0]["CIF File"])
241
+
242
+ # Generate PDB file name
243
+ pdb_file = best_cif.replace('.cif', '.pdb')
244
+
245
+ # Convert CIF to PDB if it doesn't exist
246
+ if not Path(pdb_file).exists():
247
+ st = gemmi.read_structure(best_cif)
248
+ st.write_minimal_pdb(pdb_file)
249
+
250
+ return pdb_file
251
+
252
+ # Function to plot a CIF file
253
+ def show_cif_file(cif_file):
254
+ """Plot a 3D structure from a CIF file with the Molecule3D library.
255
+
256
+ Args:
257
+ cif_file: A protein structure file in CIF format. This can be a file uploaded by the user.
258
+ If None, the function will return None.
259
+
260
+ Returns:
261
+ str or None: PDB file name if successful, None if no file was provided
262
+ or if conversion failed.
263
+ """
264
+ if not cif_file:
265
+ return None
266
+
267
+ cif_path = Path(cif_file.name)
268
+ st = gemmi.read_structure(str(cif_path))
269
+ pdb_file = cif_path.with_suffix('.pdb')
270
+ st.write_minimal_pdb(str(pdb_file)) # Convert PosixPath to string
271
+
272
+ return str(pdb_file)
273
 
274
  # Create the Gradio interface
275
  reps = [{"model": 0,"style": "cartoon","color": "hydrophobicity"}]
 
286
 
287
  gr.Image("images/logo1.png", show_label=False,width=400)
288
 
289
+ with open("introduction_page.md", "r") as f:
290
+ intro_md = f.read()
291
+ gr.Markdown(intro_md)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
 
293
 
294
  with gr.Tab("Configuration 📦"):
295
 
296
  gr.Markdown(
297
  """
298
+ ## Fasta file and configuration generator (optional)
299
  """)
300
 
301
  with gr.Row():
 
303
  slider_nb = gr.Slider(1, 500, value=300, label="Number of diffusion time steps", info="Choose the number of diffusion time steps for the simulation", step=1, interactive=True, elem_id="num_iterations")
304
  slider_trunk = gr.Slider(1, 5, value=3, label="Number of trunk recycles", info="Choose the number of iterations for the simulation", step=1, interactive=True, elem_id="trunk_number")
305
  slider_seed = gr.Slider(1, 100, value=42, label="Seed", info="Choose the seed", step=1, interactive=True, elem_id="seed")
306
+ check_options = gr.CheckboxGroup(["ESM_embeddings", "MSA_server"], value=["ESM_embeddings",], label="Additional options", info="Options to use ESM embeddings and MSA server", elem_id="options")
307
+ config_name = gr.Textbox(placeholder="Enter a name for the json file (optional)", label="JSON file name")
308
  button_json = gr.Button("Create Config file")
309
  button_json.click(fn=create_json_config, inputs=[slider_nb, slider_trunk, slider_seed, check_options, config_name], outputs=[])
310
 
311
 
312
  with gr.Column(scale=1):
313
  fasta_input = gr.Textbox(placeholder="Fasta format sequences", label="Fasta content", lines=10)
314
+ fasta_name = gr.Textbox(placeholder="Enter the name of the fasta file name (optional)", label="Fasta file name")
315
  fasta_button = gr.Button("Create Fasta file")
316
  fasta_button.click(fn=create_fasta_file, inputs=[fasta_input, fasta_name], outputs=[])
317
 
 
331
  inp1 = gr.FileExplorer(root_dir=here / "inputs/fasta",
332
  value="chai1_default_input.fasta",
333
  label="Input Fasta file",
334
+ file_count='single')
 
335
 
336
  with gr.Column(scale=1):
337
  inp2 = gr.FileExplorer(root_dir=here / "inputs/config",
338
  value="chai1_quick_inference.json",
339
  label="Configuration file",
340
+ file_count='single')
 
341
  btn_refresh = gr.Button("Refresh available files")
342
 
343
  # Only workaround I found to update the file explorer
344
  def update_file_explorer():
345
+ """Don't need to be used by LLMs, but useful for the interface to update the file explorer"""
346
  return gr.FileExplorer(root_dir=here), gr.FileExplorer(root_dir=here)
347
  def update_file_explorer_2():
348
+ """Don't need to be used by LLMs, but useful for the interface to update the file explorer"""
349
  return gr.FileExplorer(root_dir=here / "inputs/fasta"), gr.FileExplorer(root_dir=here / "inputs/config")
350
 
351
  btn_refresh.click(update_file_explorer, outputs=[inp1,inp2]).then(update_file_explorer_2, outputs=[inp1, inp2])
352
 
353
+ #out = Molecule3D(label="Plot the 3D Molecule", reps=reps)
354
+ out = gr.DataFrame(
355
+ headers=["Model Index", "Aggregate Score", "PTM", "IPTM", "CIF File"],
356
+ datatype=["number", "number", "number", "number", "str"],
357
+ label="Inference Results sorted by Aggregate Score",
358
+ visible=True,
359
+ )
360
+ out2 = Molecule3D(label="Plot the 3D Molecule", reps=reps)
361
+
362
+ btn = gr.Button("Run Simulation")
363
+ btn.click(fn=compute_Chai1, inputs=[inp1 , inp2], outputs=[out]).then(
364
+ fn=plot_protein,
365
+ inputs=out,
366
+ outputs=out2
367
+ )
368
+
369
+
370
+ with gr.Tab("Show protein from a cif file 💻"):
371
+
372
+ gr.Markdown(
373
+ """
374
+ ## Plot a 3D structure from a CIF file
375
+ """)
376
 
377
+ cif_input = gr.File(label="Input CIF file", file_count='single')
378
+ cif_output = Molecule3D(label="Plot the 3D Molecule", reps=reps)
379
+ cif_input.change(fn=show_cif_file, inputs=cif_input, outputs=cif_output)
380
 
381
  # Launch both the Gradio web interface and the MCP server
382
  if __name__ == "__main__":
introduction_page.md ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <style>
2
+ code[class*="language-bash"], pre[class*="language-bash"] {
3
+ background: #fff !important;
4
+ }
5
+ </style>
6
+
7
+ # Stakes
8
+
9
+ The industry is being deeply changed by the development of LLMs and the recent possibilities to provide them access to external tools. For years, companies have used simulation tools to accelerate and reduce the cost of product development. One of the main challenges in the coming years will be to create agents that can set up, run, and process simulations to further accelerate innovation.
10
+
11
+ # Objective
12
+
13
+ This project is a first step in creating AI agents that perform simulations on existing software. Key domains include:
14
+ - **CFD** (Computational Fluid Dynamics) simulations
15
+ - **Biology** (Protein Folding, Molecular Dynamics, etc.)
16
+ - **Neural network applications**
17
+
18
+ This project focuses on protein folding, but the same principles can be applied to other domains. In particular it uses [Chai-1](https://www.chaidiscovery.com/blog/introducing-chai-1), which is a multi-modal foundation model for molecular structure prediction, performing at state-of-the-art levels across a variety of benchmarks. Chai-1 enables unified prediction of proteins, small molecules, DNA, RNA, glycosylations, and more. Using Chai-1 on Modal is a great example of running folding simulations.
19
+
20
+ Industrial computations are often performed on HPC clusters with large resources, so simulations typically run on separate servers. The LLM must be able to access simulation results to provide complete answers to users. To this purpose, [Modal](https://modal.com/), a serverless platform that provides a simple way to run any application with the latest CPU and GPU hardware will be used.
21
+
22
+ [Demonstration](https://www.youtube.com/watch?v=P9cAKxJ9Zh8)
23
+
24
+ ---
25
+
26
+ # Instructions
27
+
28
+ <div style="background-color:#f5f5f5; border-radius:8px; padding:18px 24px; margin-bottom:24px; border:1px solid #cccccc;">
29
+
30
+ ### 1. <span style="color:#2563eb;">(Optional) Create your JSON configuration file</span>
31
+ <small>Default configuration is available if you skip this step.</small>
32
+
33
+ - Set your simulation parameters and generate the JSON config file. A unique identifier will be assigned (e.g., `chai_{run_id}_config.json`).
34
+ - **Parameters:**
35
+ - <b>Number of diffusion time steps:</b> 1 to 500
36
+ - <b>Number of trunk recycles:</b> 1 to 5
37
+ - <b>Seed:</b> 1 to 100
38
+ - <b>ESM_embeddings:</b> Include or not
39
+ - <b>MSA_server:</b> Include or not
40
+
41
+ ### 2. <span style="color:#2563eb;">(Optional) Upload a FASTA file with your molecule sequence</span>
42
+ <small>Default FASTA files are available if you skip this step.</small>
43
+
44
+ - Write your FASTA content and create the file. A unique identifier will be assigned (e.g., `chai_{run_id}_input.fasta`).
45
+ - <b style="color:#b91c1c;">Warning:</b> The header must be well formatted for Chai1 to process it.
46
+
47
+ **FASTA template:**
48
+ ```fasta
49
+ >{molecule_type}|{molecule_name}
50
+ Sequence (for protein/RNA/DNA) or SMILES for ligand
51
+ ```
52
+
53
+ **Accepted molecule types:**
54
+ `protein`/ `rna`/ `dna` / `ligand`
55
+
56
+ **Default input (provided by Chai1):**
57
+ ```fasta
58
+ >protein|name=example-of-long-protein
59
+ AGSHSMRYFSTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASPRGEPRAPWVEQEGPEYWDRETQKYKRQAQTDRVSLRNLRGYYNQSEAGSHTLQWMFGCDLGPDGRLLRGYDQSAYDGKDYIALNEDLRSWTAADTAAQITQRKWEAAREAEQRRAYLEGTCVEWLRRYLENGKETLQRAEHPKTHVTHHPVSDHEATLRCWALGFYPAEITLTWQWDGEDQTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPLTLRWEP
60
+
61
+ >protein|name=example-of-short-protein
62
+ AIQRTPKIQVYSRHPAENGKSNFLNCYVSGFHPSDIEVDLLKNGERIEKVEHSDLSFSKDWSFYLLYYTEFTPTEKDEYACRVNHVTLSQPKIVKWDRDM
63
+
64
+ >protein|name=example-peptide
65
+ GAAL
66
+
67
+ >ligand|name=example-ligand-as-smiles
68
+ CCCCCCCCCCCCCC(=O)O
69
+ ```
70
+ <small>For a peptide, use `protein` as the molecule type.</small>
71
+
72
+ **Other example:**
73
+ ```fasta
74
+ >protein|lysozyme
75
+ MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPDLNAAKSELDKAIGRNCNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRCAAINQVFQMGETGVAGFTNSLRMLQQKRWDEAAVNLAKSRWYNQTPDRAKRVITTFRTGTWDAYKNL
76
+ ```
77
+ ### 3. <span style="color:#2563eb;">Select your config and FASTA files</span>
78
+ <small>Files are stored in your working directory as you create them.</small>
79
+
80
+ ### 4. <span style="color:#2563eb;">Click the "Run" button to start the simulation</span>
81
+
82
+ ### 5. <span style="color:#2563eb;">View the 3D visualization of your molecule</span>
83
+ </div>
84
+
85
+ ## Simulation parameters choice
86
+ If no config or fasta files are created, default values are chosen:
87
+ - chai1_default_input.fasta
88
+ - chai1_quick_inference.json
89
+
90
+ The files content is diplayed at the bottom of the page.
91
+ The default json configuration makes the computation fast (about 2min) but results can be disappointing.
92
+ Please use chai1_default_inference.json to have a wonderful protein 😃.
93
+
94
+ - chai1_quick_inference.json
95
+ ```json
96
+ {
97
+ "num_trunk_recycles": 1,
98
+ "num_diffn_timesteps": 10,
99
+ "seed": 42,
100
+ "use_esm_embeddings": true
101
+ "use_msa_server": false
102
+ }
103
+ ```
104
+
105
+ # Contact
106
+ For any issues or questions, please contact the developer or refer to the documentation.