Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from rdkit import Chem | |
| from rdkit.Chem import Draw | |
| import tempfile | |
| import os | |
| import sys | |
| # Add the streamlit_app directory to the Python path | |
| current_dir = os.path.dirname(os.path.abspath(__file__)) | |
| module_path = os.path.abspath(os.path.join(current_dir, '..', '..')) | |
| sys.path.append(module_path) | |
| parent_dir = os.path.join(current_dir, "..") | |
| sys.path.append(parent_dir) | |
| from utils import * | |
| from streamlit_app.features import DescriptorGenerator, FingerprintGenerator | |
| # Set up the page configuration | |
| set_page_config( | |
| page_title = "Chem Converter", | |
| page_icon = os.path.join(parent_dir, "assets", "QC-Devs.png") | |
| ) | |
| st.title("Chemical File Converter") | |
| # Description of the page | |
| st.markdown(""" | |
| This page allows you to upload raw chemical file formats such as SMILES or SDF, | |
| and convert them into chemical matrices that can be used as input for selector's various algorithms. | |
| """) | |
| # File uploader for chemical file | |
| chemical_file = st.file_uploader("Upload a chemical file (e.g., SMILES, SDF, or TXT)", | |
| type = ["txt", "smi", "sdf"]) | |
| if chemical_file: | |
| # User selects the file format | |
| file_format = st.selectbox( | |
| "Select the format of the provided file", | |
| options = ["", "SMILES", "SDF"] | |
| ) | |
| if file_format: | |
| molecules = [] | |
| temp_sdf_path = None | |
| # Process the chemical file based on user selection | |
| if file_format == "SMILES": | |
| smiles_list = chemical_file.read().decode("utf-8").splitlines() | |
| molecules = [Chem.MolFromSmiles(smiles) for smiles in smiles_list] | |
| elif file_format == "SDF": | |
| # Create a temporary file to save the uploaded SDF content | |
| with tempfile.NamedTemporaryFile(delete = False, suffix = ".sdf") as temp_sdf: | |
| temp_sdf.write(chemical_file.read()) | |
| temp_sdf_path = temp_sdf.name | |
| # Use RDKit's SDMolSupplier to read molecules from the SDF file | |
| supplier = Chem.SDMolSupplier(temp_sdf_path) | |
| molecules = [mol for mol in supplier if mol is not None] | |
| # Explicitly close the supplier to release the file | |
| del supplier | |
| # Check for valid molecules | |
| valid_molecules = [mol for mol in molecules if mol is not None] | |
| if not valid_molecules: | |
| st.error("No valid molecules found in the uploaded file.") | |
| else: | |
| st.success(f"Successfully loaded {len(valid_molecules)} valid molecules.") | |
| # Display the molecules | |
| img = Draw.MolsToImage(valid_molecules) | |
| st.image(img, caption = "Molecules in the file") | |
| # Choose the type of matrix to generate | |
| matrix_type = st.selectbox("Choose matrix type", ["Descriptors", "Fingerprints"]) | |
| if matrix_type == "Descriptors": | |
| # Allow the user to choose the type of descriptors to generate | |
| use_fragment = st.checkbox("Whether return value includes the fragment binary descriptors", value = True) | |
| ipc_avg = st.checkbox("Whether IPC descriptor calculates with avg", value = True) | |
| descriptor_generator = DescriptorGenerator(valid_molecules) | |
| matrix = descriptor_generator.rdkit_desc(use_fragment, ipc_avg) | |
| elif matrix_type == "Fingerprints": | |
| # Allow user to choose the type of fingerprint to generate | |
| fp_type = st.selectbox("Select Fingerprint Type", options=["SECFP", "ECFP", "Morgan"]) | |
| n_bits = st.number_input("Number of bits for the fingerprint", min_value = 1, value = 2048) | |
| radius = st.number_input("The maximum radius of the substructure that is generated at each atom", min_value = 1, value = 3) | |
| min_radius = st.number_input("The minimum radius that is used to extract n-grams", min_value = 1, value = 3) | |
| random_seed = st.number_input("Random seed for fingerprint generation", min_value = 0, value = 12345) | |
| rings = st.checkbox("Whether the rings (SSSR) are extracted from the molecule and added to the shingling", value = True) | |
| isomeric = st.checkbox("Whether the SMILES added to the shingling are isomeric", value = True) | |
| kekulize = st.checkbox("Whether the SMILES added to the shingling are kekulized", value = False) | |
| fp_generator = FingerprintGenerator(valid_molecules) | |
| matrix = fp_generator.compute_fingerprint(fp_type = fp_type) | |
| st.write("Generated Chemical Matrix:") | |
| st.dataframe(matrix) | |
| # Option to download the matrix as CSV | |
| csv_data = matrix.to_csv().encode('utf-8') | |
| st.download_button("Download Chemical Matrix as CSV", data = csv_data, | |
| file_name = "chemical_matrix.csv", mime = "text/csv") | |
| # Clean up the temporary file after RDKit is done with it | |
| if temp_sdf_path and os.path.exists(temp_sdf_path): | |
| os.remove(temp_sdf_path) | |