from builtins import any as b_any def extract_uniprot_locations(protein): if 'comments' in protein: all_locs = [locs['subcellularLocations'] for locs in protein['comments'] if (locs['commentType']=='SUBCELLULAR LOCATION' and 'subcellularLocations' in locs)][0] locations = [locs['location']['value'] for locs in all_locs] locations = ','.join(locations) return locations else: return 'no location available from database' def get_protein_by_accession(accession, proteins): protein = [prot for prot in proteins if prot['primaryAccession']==accession][0] return protein def get_location_from_acession(accession, proteins): try: protein = get_protein_by_accession(accession, proteins) locations = extract_uniprot_locations(protein) return locations except IndexError: return 'Accession not found, maybe ir was merged/renamed ?' def is_in_nucleus(locations): try: if b_any('nucleus' in loc.lower() for loc in locations): return 'is' else: return 'is not' except: return 'not available' def is_transcription_factor(accession, proteins): try: protein = get_protein_by_accession(accession, proteins) transc_score = 0 comments = protein['comments'] if len(comments) > 0: for comment in comments: if comment['commentType'] == 'FUNCTION': texts = comment['texts'] if len(texts) > 0: for text in texts: for key_term in ['transcription', 'regulator']: if key_term in text['value'].lower(): transc_score += 1 if transc_score > 0: return 'is' else: return 'is not' except: return 'not available' def search(values, searchFor): for k in values: try: for v in values[k]: if searchFor in v: return k else: return None except TypeError: continue