originbio-component1 / fetch_fasta.py
Junaidb's picture
Create fetch_fasta.py
654ad08 verified
raw
history blame contribute delete
977 Bytes
import requests
import re
def FetchFasta_Uniprot(gene_symbol):
organismid=9606
baseurl=f"https://rest.uniprot.org/uniprotkb/search?query=gene:{gene_symbol}+AND+organism_id:{organismid}&format=fasta"
response=requests.get(baseurl)
actualresponse=response.text
regex_pattern = r">.*?\s+([\s\S]+?)(?=>|$)"
match = re.search(regex_pattern, actualresponse)
if match:
sequence_block = match.group(1)
clean_sequence = re.sub(r'\s+', '', sequence_block)
header_removed_sequence=clean_sequence.split("SV=")[1]
for character in header_removed_sequence:
if character.isnumeric()==True:
updated_sequence=header_removed_sequence.replace(character,"")
header_removed_sequence=updated_sequence
print(header_removed_sequence)
return header_removed_sequence
else:
print("No sequence found after the first header.")