Spaces:
Sleeping
Sleeping
Create fetch_fasta.py
Browse files- fetch_fasta.py +33 -0
fetch_fasta.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import re
|
| 3 |
+
|
| 4 |
+
def FetchFasta_Uniprot(gene_symbol):
|
| 5 |
+
|
| 6 |
+
organismid=9606
|
| 7 |
+
baseurl=f"https://rest.uniprot.org/uniprotkb/search?query=gene:{gene_symbol}+AND+organism_id:{organismid}&format=fasta"
|
| 8 |
+
response=requests.get(baseurl)
|
| 9 |
+
actualresponse=response.text
|
| 10 |
+
|
| 11 |
+
regex_pattern = r">.*?\s+([\s\S]+?)(?=>|$)"
|
| 12 |
+
|
| 13 |
+
match = re.search(regex_pattern, actualresponse)
|
| 14 |
+
|
| 15 |
+
if match:
|
| 16 |
+
sequence_block = match.group(1)
|
| 17 |
+
|
| 18 |
+
clean_sequence = re.sub(r'\s+', '', sequence_block)
|
| 19 |
+
|
| 20 |
+
header_removed_sequence=clean_sequence.split("SV=")[1]
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
for character in header_removed_sequence:
|
| 24 |
+
if character.isnumeric()==True:
|
| 25 |
+
updated_sequence=header_removed_sequence.replace(character,"")
|
| 26 |
+
header_removed_sequence=updated_sequence
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
print(header_removed_sequence)
|
| 30 |
+
return header_removed_sequence
|
| 31 |
+
|
| 32 |
+
else:
|
| 33 |
+
print("No sequence found after the first header.")
|