Spaces:
Runtime error
Runtime error
| from interproscan import InterproScan | |
| from Bio.Blast.Applications import NcbiblastpCommandline | |
| from utils.utils import extract_interproscan_metrics, get_seqnid, extract_blast_metrics, rename_interproscan_keys | |
| import os | |
| import json | |
| # input fasta file | |
| input_fasta = "evolla_test/test_hq0704_da_w_plddt_mask_hard_idnseqs.fasta" | |
| ##################################################### | |
| # run blast | |
| ##################################################### | |
| # settings | |
| blast_database = "uniprot_swissprot" | |
| expect_value = 0.01 | |
| blast_xml = "evolla_test/test_hq0704_da_w_plddt_mask_hard_blast.xml" | |
| seq_dict = get_seqnid(input_fasta) | |
| output_dir = os.path.dirname(blast_xml) | |
| if not os.path.exists(output_dir): | |
| os.makedirs(output_dir) | |
| blast_cmd = NcbiblastpCommandline( | |
| query=input_fasta, | |
| db=blast_database, | |
| out=blast_xml, | |
| outfmt=5, # XML 格式 | |
| evalue=expect_value | |
| ) | |
| blast_cmd() # 运行 | |
| blast_results = extract_blast_metrics(blast_xml) | |
| blast_info = {} | |
| for uid, info in blast_results.items(): | |
| blast_info[uid] = {"sequence": seq_dict[uid], "blast_results": info} | |
| # save blast results | |
| with open(blast_xml.replace(".xml", ".json"), "w") as f: | |
| json.dump(blast_info, f, indent=4) | |
| ##################################################### | |
| # run interproscan | |
| ##################################################### | |
| # settings | |
| goterms = True | |
| pathways = True | |
| interproscan_json = "evolla_test/test_hq0704_da_w_plddt_mask_hard_interproscan.json" | |
| interproscan_path = "interproscan/interproscan-5.75-106.0/interproscan.sh" | |
| librarys = ["PFAM", "PIRSR", "PROSITE_PROFILES", "SUPERFAMILY", "PRINTS", "PANTHER", "CDD", "GENE3D", "NCBIFAM", "SFLM", "MOBIDB_LITE", "COILS", "PROSITE_PATTERNS", "FUNFAM", "SMART"] | |
| interproscan = InterproScan(interproscan_path) | |
| input_args = { | |
| "fasta_file": input_fasta, | |
| "goterms": goterms, | |
| "pathways": pathways, | |
| "save_dir": interproscan_json} | |
| interproscan.run(**input_args) # 运行 | |
| # output_name = input_fasta.split("/")[-1] + ".json" | |
| interproscan_results = extract_interproscan_metrics(interproscan_json, | |
| librarys=librarys) | |
| interproscan_info = {} | |
| for id, seq in seq_dict.items(): | |
| info = interproscan_results[seq] | |
| info = rename_interproscan_keys(info) | |
| interproscan_info[id] = {"sequence":seq, "interproscan_results": info} | |
| # save blast results | |
| with open(interproscan_json, "w") as f: | |
| json.dump(interproscan_info, f, indent=4) |