Spaces:
Sleeping
Sleeping
File size: 2,179 Bytes
456f631 63ec1fc 456f631 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 | from builtins import any as b_any
def extract_uniprot_locations(protein):
if 'comments' in protein:
all_locs = [locs['subcellularLocations'] for locs in protein['comments'] if (locs['commentType']=='SUBCELLULAR LOCATION' and 'subcellularLocations' in locs)][0]
locations = [locs['location']['value'] for locs in all_locs]
locations = ','.join(locations)
return locations
else:
return 'no location available from database'
def get_protein_by_accession(accession, proteins):
protein = [prot for prot in proteins if prot['primaryAccession']==accession][0]
return protein
def get_location_from_acession(accession, proteins):
try:
protein = get_protein_by_accession(accession, proteins)
locations = extract_uniprot_locations(protein)
return locations
except IndexError:
return 'Accession not found, maybe ir was merged/renamed ?'
def is_in_nucleus(locations):
try:
if b_any('nucleus' in loc.lower() for loc in locations):
return 'is'
else:
return 'is not'
except:
return 'not available'
def is_transcription_factor(accession, proteins):
try:
protein = get_protein_by_accession(accession, proteins)
transc_score = 0
comments = protein['comments']
if len(comments) > 0:
for comment in comments:
if comment['commentType'] == 'FUNCTION':
texts = comment['texts']
if len(texts) > 0:
for text in texts:
for key_term in ['transcription', 'regulator']:
if key_term in text['value'].lower():
transc_score += 1
if transc_score > 0:
return 'is'
else:
return 'is not'
except:
return 'not available'
def search(values, searchFor):
for k in values:
try:
for v in values[k]:
if searchFor in v:
return k
else: return None
except TypeError:
continue |