File size: 977 Bytes
654ad08
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import requests
import re

def FetchFasta_Uniprot(gene_symbol):
    
    organismid=9606
    baseurl=f"https://rest.uniprot.org/uniprotkb/search?query=gene:{gene_symbol}+AND+organism_id:{organismid}&format=fasta"
    response=requests.get(baseurl)
    actualresponse=response.text

    regex_pattern = r">.*?\s+([\s\S]+?)(?=>|$)"

    match = re.search(regex_pattern, actualresponse)

    if match:
        sequence_block = match.group(1)
        
        clean_sequence = re.sub(r'\s+', '', sequence_block)

        header_removed_sequence=clean_sequence.split("SV=")[1]
        
        
        for character in header_removed_sequence:
            if character.isnumeric()==True:
                updated_sequence=header_removed_sequence.replace(character,"")
                header_removed_sequence=updated_sequence

        
        print(header_removed_sequence)
        return header_removed_sequence

    else:
        print("No sequence found after the first header.")