BOTeome / scripts /utils.py
juan
SQL integrtion
63ec1fc
from builtins import any as b_any
def extract_uniprot_locations(protein):
if 'comments' in protein:
all_locs = [locs['subcellularLocations'] for locs in protein['comments'] if (locs['commentType']=='SUBCELLULAR LOCATION' and 'subcellularLocations' in locs)][0]
locations = [locs['location']['value'] for locs in all_locs]
locations = ','.join(locations)
return locations
else:
return 'no location available from database'
def get_protein_by_accession(accession, proteins):
protein = [prot for prot in proteins if prot['primaryAccession']==accession][0]
return protein
def get_location_from_acession(accession, proteins):
try:
protein = get_protein_by_accession(accession, proteins)
locations = extract_uniprot_locations(protein)
return locations
except IndexError:
return 'Accession not found, maybe ir was merged/renamed ?'
def is_in_nucleus(locations):
try:
if b_any('nucleus' in loc.lower() for loc in locations):
return 'is'
else:
return 'is not'
except:
return 'not available'
def is_transcription_factor(accession, proteins):
try:
protein = get_protein_by_accession(accession, proteins)
transc_score = 0
comments = protein['comments']
if len(comments) > 0:
for comment in comments:
if comment['commentType'] == 'FUNCTION':
texts = comment['texts']
if len(texts) > 0:
for text in texts:
for key_term in ['transcription', 'regulator']:
if key_term in text['value'].lower():
transc_score += 1
if transc_score > 0:
return 'is'
else:
return 'is not'
except:
return 'not available'
def search(values, searchFor):
for k in values:
try:
for v in values[k]:
if searchFor in v:
return k
else: return None
except TypeError:
continue