Spaces:

mishtert
/

tracer

Runtime error

App Files Files Community

mishtert commited on Sep 8, 2022

Commit

32da6be

1 Parent(s): ac27aa9

Upload dtxutils.py

Browse files

Files changed (1) hide show

dtxutils.py +343 -0

dtxutils.py ADDED Viewed

	@@ -0,0 +1,343 @@

+from utils.pharmap_utils.meshutils import nct_to_mesh_term, mesh_term_to_id, df_mesh, df_mesh_ct
+from utils.pharmap_utils.cid import CaseInsensitiveDict
+from utils.pharmap_utils.dictutils import *
+import re
+import streamlit as st
+# mesh list extract
+def meshtrm_lst_xtract(nct_value):
+	try:
+		mesh_term = nct_to_mesh_term[nct_value]
+		mesh_term_list = list(mesh_term)
+		return mesh_term_list
+	except:
+		pass
+@st.cache(suppress_st_warning=True, allow_output_mutation=True)
+# type extract fun
+def type_extract(mesh_term_list):
+	mesh_term_list = [mesh_term_list] if isinstance(mesh_term_list, str) else mesh_term_list
+	# print('mesh_term_list: ',mesh_term_list)
+	# l2_map_lst=[]
+	uid_lst = []
+	if mesh_term_list is not None:
+		for val in mesh_term_list:
+			# print('value inside uid forloop:',val)
+			try:
+				# print('Inside get uid')
+				uid = mesh_term_to_id[val]
+				uid_lst.append(uid)
+				# print(uid_lst)
+				if uid_lst is None:
+					uid_lst = []
+			except:
+				pass
+				# print('error in get uid list')
+				# get mesh num
+		mesh_num_xtract_lst = []
+		for val in uid_lst:
+			try:
+				# print('Inside get mesh num')
+				mesh_num_xtract = df_mesh.loc[df_mesh['ui'] == val, 'mesh_number'].iloc[0]
+				mesh_num_xtract_lst.append(mesh_num_xtract)
+				# print(mesh_num_xtract_lst)
+				if ',' in mesh_num_xtract_lst[0]:
+					mesh_num_xtract_lst = mesh_num_xtract_lst[0].split(", ")
+					# print('mesh_num_xtract_lst after spltting',mesh_num_xtract_lst)
+			except:
+				pass
+				# print('error in get mesh num')
+		# mesh number extract l2
+		l2_map_lst = []
+		for val in mesh_num_xtract_lst:
+			# print('Inside l2map for loop',val)
+			search_value = val[:3]
+			# print('printing search value:',search_value)
+			try:
+				l2_map = df_mesh.loc[df_mesh['mesh_number'] == search_value, 'name'].iloc[0]
+				# print(l2_map)
+				l2_map_lst.append(l2_map)
+				# print(l2_map_lst)
+				if l2_map_lst is None:
+					l2_map_lst = []
+			except:
+				pass
+		l2_map_lst = list(set(l2_map_lst))
+		# print('finaloutput',l2_map_lst)
+		return l2_map_lst
+def split_values(col_val):
+	# """split words seperated by special characters"""
+	# print(col_val)
+	if col_val != '':
+		char_list = ['|', ',', '/', '.', ';', './', ',/', '/ ', ' /']
+		# res = ' '.join([ele for ele in char_list if(ele in col_val)])
+		res = [ele for ele in char_list if (ele in col_val)]
+		# print('printing string of found char',res)
+		colstring = str(col_val)
+		f_res = []
+		try:
+			while len(res) > 0:
+				res = res[-1]
+				f_res = colstring.split(''.join(res))
+				# print(f_res)
+				# return f_res
+				f_res = [x for x in f_res if x is not None]
+				return ', '.join(f_res)
+		except:
+			pass
+		else:
+			return col_val
+def map_entry_terms(myText):
+	obj = CaseInsensitiveDict(entry_dict)
+	pattern = re.compile(r'(?<!\w)(' + '|'.join(re.escape(key) for key in obj.keys()) + r')(?!\w)', flags=re.IGNORECASE)
+	text = pattern.sub(lambda x: obj[x.group()], myText)
+	# text = pattern.sub(lambda x: obj[x.group()], text)
+	return text.strip().split('/')
+def remove_none(some_list):
+	some_list = [some_list] if isinstance(some_list, str) else some_list
+	if some_list is not None:
+		some_list = list(filter(lambda x: x != None, some_list))
+		return some_list
+def retain_all_ta(some_list):
+	some_list = [some_list] if isinstance(some_list, str) else some_list
+	# some_list.split(',')
+	value = 'all_ta'
+	#   print(value)
+	if some_list is not None:
+		if value in some_list:
+			some_list = [value]
+			return some_list
+		else:
+			return some_list
+def unique_list(l):
+	l = map(str.strip, l)  # remove whitespace from list element
+	# print(l)
+	ulist = []
+	[ulist.append(x) for x in l if x not in ulist]
+	return ulist
+def split_for_type_extract(my_list, char):
+	# print('entering the function:',my_list)
+	try:
+		my_list = [my_list] if isinstance(my_list, str) else my_list
+		if my_list is not None:
+			# print(my_list)
+			my_list = list(map(lambda x: x.split(char)[0], my_list))
+			# my_list = [x for x in my_list if x is not None]
+			return my_list
+	except:
+		pass
+def special_ask(col_value):
+	col_value = col_value.lower()
+	if col_value == 'obesity':
+		ta_list = 'met'
+		return ta_list.split()
+	elif col_value == 'healthy subject':
+		ta_list = 'all_ta'
+		return ta_list.split()
+	elif col_value == 'healthy subjects':
+		ta_list = 'all_ta'
+		return ta_list.split()
+	elif col_value == 'healthy participants':
+		ta_list = 'all_ta'
+		return ta_list.split()
+	elif col_value == 'healthy participant':
+		ta_list = 'all_ta'
+		return ta_list.split()
+	elif col_value == 'inflammation':
+		ta_list = 'ai'
+		return ta_list.split()
+	else:
+		pass
+def remove_stopwords(query):
+	stopwords = ['acute-on-chronic', 'acute', 'chronic',
+	             'diseases of the', '-19', '- 19', '19', '.']
+	if query is not None:
+		querywords = query.split()
+		resultwords = [word for word in querywords if word.lower() not in stopwords]
+		result = ' '.join(resultwords)
+		return result
+	else:
+		''
+def gb_2_us(text, mydict):
+	try:
+		for us, gb in mydict.items():
+			text = text.replace(gb, us)
+			return text
+	except:
+		return ''
+def fix_text_with_dict(text, mydict):
+	text = ','.join([repl_dict.get(i, i) for i in text.split(', ')])
+	return text
+def replace_text(mytext):
+	cancer = ['cancer', 'neoplasm', 'carcinoma', 'lymphoma', 'adenoma', 'myoma', 'meningioma',
+	          'malignancy', 'tumor', 'malignancies', 'chemotherapy']
+	# fracture = ['fractures', 'fracture']
+	heart_failure = ['heart failure', 'cardiac']
+	ectomy = 'prostatectomy'
+	covid = 'covid'
+	transplant = 'transplant'
+	healthy = 'healthy'
+	park = 'parkinson'
+	allergy = ['allergy', 'allergic']
+	virus = 'virus'
+	cornea = ['cornea', 'eye', 'ocular', 'macular']
+	vaccine = 'vaccines'
+	ureter = 'ureter'
+	mutation = 'mutation'
+	stemcell = 'stem cells'
+	behavior = ['behavior', 'depressive', 'depression', 'anxiety', 'satisfaction', 'grief']
+	molar = ['molar', 'dental', 'maxillary']
+	diet = 'diet'
+	biopsy = 'biopsy'
+	physiology = 'physiology'
+	infection = ['infection', 'bacteremia', 'fungemia']
+	preg = ['pregnancy', 'pregnant', 'labor', 'birth']
+	imaging = ['x-ray', 'imaging', 'mri']
+	surgery = 'surgery'
+	angina = 'angina'
+	use_disorder = ['use disorder', 'obsessive', 'panic', 'posttraumatic stress',
+	                'post-traumatic stress', 'schizophrenia']
+	if mytext:
+		try:
+			if any(text in mytext.lower() for text in cancer):
+				mytext = 'neoplasms'
+				return mytext
+			if any(text in mytext.lower() for text in heart_failure):
+				mytext = 'cardiovascular diseases'
+				return mytext
+			if covid in mytext.lower():
+				mytext = 'covid-19'
+				return mytext
+			if ectomy in mytext.lower():
+				mytext = 'urogenital surgical procedures'
+				return mytext
+			if transplant in mytext.lower():
+				mytext = 'body regions'
+				return mytext
+			if healthy in mytext.lower():
+				mytext = 'healthy volunteers'
+				return mytext
+			if any(text in mytext.lower() for text in allergy):
+				mytext = 'immune system diseases'
+				return mytext
+			if park in mytext.lower():
+				mytext = 'parkinson disease'
+				return mytext
+			if park in mytext.lower():
+				mytext = 'immune system diseases'
+				return mytext
+			if virus in mytext.lower():
+				mytext = 'viruses'
+				return mytext
+			if any(text in mytext.lower() for text in cornea):
+				mytext = 'eye diseases'
+				return mytext
+			if vaccine in mytext.lower():
+				mytext = 'vaccines'
+				return mytext
+			if ureter in mytext.lower():
+				mytext = 'ureter'
+				return mytext
+			if mutation in mytext.lower():
+				mytext = 'mutation'
+				return mytext
+			if stemcell in mytext.lower():
+				mytext = 'stem cells'
+				return mytext
+			if any(text in mytext.lower() for text in behavior):
+				mytext = 'behavior'
+				return mytext
+			if any(text in mytext.lower() for text in molar):
+				mytext = 'molar'
+				return mytext
+			if diet in mytext.lower():
+				mytext = 'diet'
+				return mytext
+			if biopsy in mytext.lower():
+				mytext = 'biopsy'
+				return mytext
+			if physiology in mytext.lower():
+				mytext = 'physiology'
+				return mytext
+			if any(text in mytext.lower() for text in infection):
+				mytext = 'infections'
+				return mytext
+			if any(text in mytext.lower() for text in preg):
+				mytext = 'reproductive and urinary physiological phenomena'
+				return mytext
+			if any(text in mytext.lower() for text in imaging):
+				mytext = 'diagnosis'
+				return mytext
+			if surgery in mytext.lower():
+				mytext = 'medicine'
+				return mytext
+			if angina in mytext.lower():
+				mytext = 'angina pectoris'
+				return mytext
+			if any(text in mytext.lower() for text in use_disorder):
+				mytext = 'mental disorders'
+				return mytext
+			else:
+				return mytext
+		except:
+			return ''
+# For studies in CTgov
+def is_nct(col_value):
+	# Returns mesh term list based on NCT ID
+	val = col_value[:3]
+	if val == 'NCT':
+		try:
+			if col_value in df_mesh_ct.values:
+				mesh_term_list = meshtrm_lst_xtract(col_value)
+				l2map = type_extract(mesh_term_list)
+				return l2map
+		except:
+			pass
+	else:
+		'Study Not in Database, Please enter condition or conditions treated'
+	return
+# For studies not in CTgov
+def is_not_nct(col_value):
+	# Returns mesh term list based on NCT ID
+	# Returns disease type l2 tag in Mesh dictionary
+	if col_value is not None:
+		mesh_term_list = col_value
+		l2map = type_extract(mesh_term_list)
+		return l2map
+	else:
+		None
+	return