Spaces:

cafierom
/

MoDrAg2

Sleeping

App Files Files Community

MoDrAg2 / modrag_task_graphs.py

cafierom

Upload main code files

baeba65 verified 9 days ago

raw

history blame contribute delete

4.32 kB

	from modrag_protein_functions import uniprot_node, listbioactives_node, getbioactives_node, predict_node, docking_node
	from modrag_molecule_functions import smiles_node

	def get_actives_for_protein(query_protein: str):
	'''
	Finds Bioactive molecules for a give protein. Uses Uniprot to find chembl IDs for the
	protein, and then queries chembl for bioative molecules.

	Args:
	query_protein: The protein to search for.

	Returns:
	bioactives_for_protein_string: A string containing bioactive molecules.
	bioacts_images: A list of images of the bioactives.
	'''

	bioactives_for_protein_string = ''

	# find UP accession codes for protein
	up_ac_list, ids_string, _ = uniprot_node([query_protein])

	bioactives_for_protein_string += 'Found the following Uniprot ACs: \n'
	for up_ac in up_ac_list[0]:
	bioactives_for_protein_string += up_ac + ', \n'

	# find chembl IDs for each accession code
	bioacts, chembl_string, chembl_ids = listbioactives_node(up_ac_list[0])

	bioactives_for_protein_string += 'Found the following chembl IDs: \n'

	#check for chembl IDs with bioactives:
	largest = 0
	for bioact_num, chemblid in zip(bioacts[0], chembl_ids[0]):
	bioactives_for_protein_string += f'{chemblid}: {bioact_num} bioactive molecules. \n'
	if bioact_num > largest:
	largest = bioact_num
	largest_id = chemblid
	if largest == 0:
	return 'No bioactives found for protein', None

	bioactives_for_protein_string += f'Chose the Chembl ID {largest_id} with {largest} bioactive molecules. \n'
	# get list of bioactives for best chembl_id
	bioacts, bioacts_string, bioacts_images = getbioactives_node([largest_id])

	bioactives_for_protein_string += bioacts_string

	return bioacts, bioactives_for_protein_string, bioacts_images

	def get_predictions_for_protein(smiles_list: list[str], query_protein: str):
	'''

	Uses Uniprot to find chembl IDs for the protein, and then queries chembl for
	bioactive molecules to train a model and predict the activity of the given smiles.

	Args:
	smiles_list: A list of SMILES strings.
	query_protein: The protein to search for.

	Returns:
	bioactives_for_protein_string: A string containing bioactive molecules.
	bioacts_images: A list of images of the bioactives.
	'''

	predictions_string = ''

	# find UP accession codes for protein
	up_ac_list, ids_string, _ = uniprot_node([query_protein])

	predictions_string += 'Found the following Uniprot ACs: \n'
	for up_ac in up_ac_list[0]:
	predictions_string += up_ac + ', \n'

	# find chembl IDs for each accession code
	bioacts, chembl_string, chembl_ids = listbioactives_node(up_ac_list[0])

	predictions_string += 'Found the following chembl IDs: \n'

	#check for chembl IDs with bioactives:
	largest = 0
	for bioact_num, chemblid in zip(bioacts[0], chembl_ids[0]):
	predictions_string += f'{chemblid}: {bioact_num} bioactive molecules. \n'
	if bioact_num > largest:
	largest = bioact_num
	largest_id = chemblid
	if largest == 0:
	return [], 'No bioactives found for protein'

	predictions_string += f'Chose the Chembl ID {largest_id} with {largest} bioactive molecules. \n'
	# train the model on the chembl ID and then predict
	preds, preds_string, _ = predict_node(smiles_list, largest_id)

	predictions_string += preds_string

	return preds, predictions_string, None

	def dock_from_names(names_list: list[str], protein: str):
	'''
	Accepts names of molecules and docks them in a given protein.

	Args:
	names_list: A list of names of molecules.
	protein: The protein to dock in.

	Returns:
	dock_from_names_string: A string containing the docking scores for the molecules.
	'''
	dock_from_names_string = ''

	# get SMILES for names:
	smiles_list, smiles_string, _ = smiles_node(names_list)

	for smiles, names in zip(smiles_list, names_list):
	dock_from_names_string += f'The SMILES for {names} is {smiles}. \n'

	#send for docking
	scores, scores_string, _ = docking_node(smiles_list, protein)

	for score, name in zip(scores, names_list):
	dock_from_names_string += f'The docking score for {name} is {score}. \n'

	return scores, dock_from_names_string, None