| import re | |
| from rdkit import Chem | |
| from rdkit.Chem import MolFromSmiles, SDWriter | |
| import logging | |
| from Bio import SeqIO | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| def process_smiles(smiles: str) -> str: | |
| mol = MolFromSmiles(smiles) | |
| if not mol: | |
| raise ValueError(f"Invalid SMILES string: {smiles}") | |
| sdf_file = "/tmp/output.sdf" | |
| writer = SDWriter(sdf_file) | |
| writer.write(mol) | |
| writer.close() | |
| return sdf_file | |
| def process_pdb(file_path: str) -> str: | |
| sequences = [] | |
| with open(file_path, "r") as handle: | |
| for record in SeqIO.parse(handle, "pdb-seqres"): | |
| sequences.append(str(record.seq)) | |
| return " ".join(sequences) | |
| def process_sdf(file_path: str) -> str: | |
| return file_path | |
| def extract_smiles(text: str) -> str: | |
| smiles_pattern = r"([^J][0-9BCOHNSOPrIFla@+\-\[\]\(\)\\\/%=#$]{6,})" | |
| matches = re.findall(smiles_pattern, text) | |
| if matches: | |
| return matches[0] | |
| return "" | |
| def is_valid_smiles(smiles: str) -> bool: | |
| mol = MolFromSmiles(smiles) | |
| return mol is not None | |
| def extract_and_convert_to_sdf(text: str) -> str: | |
| smiles = extract_smiles(text) | |
| if smiles and is_valid_smiles(smiles): | |
| return process_smiles(smiles) | |
| raise ValueError("No valid SMILES string found in the text.") | |