Spaces:

thexForce
/

originbio-component1

Sleeping

originbio-component1 / fetch_fasta.py

Create fetch_fasta.py

654ad08 verified about 2 months ago

977 Bytes

	import requests
	import re

	def FetchFasta_Uniprot(gene_symbol):

	organismid=9606
	baseurl=f"https://rest.uniprot.org/uniprotkb/search?query=gene:{gene_symbol}+AND+organism_id:{organismid}&format=fasta"
	response=requests.get(baseurl)
	actualresponse=response.text

	regex_pattern = r">.*?\s+([\s\S]+?)(?=>\|$)"

	match = re.search(regex_pattern, actualresponse)

	if match:
	sequence_block = match.group(1)

	clean_sequence = re.sub(r'\s+', '', sequence_block)

	header_removed_sequence=clean_sequence.split("SV=")[1]


	for character in header_removed_sequence:
	if character.isnumeric()==True:
	updated_sequence=header_removed_sequence.replace(character,"")
	header_removed_sequence=updated_sequence


	print(header_removed_sequence)
	return header_removed_sequence

	else:
	print("No sequence found after the first header.")