Spaces:
No application file
No application file
| # Copyright 2014 by Kevin Wu. | |
| # Revisions copyright 2014 by Peter Cock. | |
| # All rights reserved. | |
| # | |
| # This file is part of the Biopython distribution and governed by your | |
| # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
| # Please see the LICENSE file that should have been included as part of this | |
| # package. | |
| """Provides code to access the REST-style KEGG online API. | |
| This module aims to make the KEGG online REST-style API easier to use. See: | |
| https://www.kegg.jp/kegg/rest/keggapi.html | |
| The KEGG REST-style API provides simple access to a range of KEGG databases. | |
| This works using simple URLs (which this module will construct for you), | |
| with any errors indicated via HTTP error levels. | |
| The functionality is somewhat similar to Biopython's Bio.TogoWS and Bio.Entrez | |
| modules. | |
| Currently KEGG does not provide any usage guidelines (unlike the NCBI whose | |
| requirements are reasonably clear). To avoid risking overloading the service, | |
| Biopython will only allow three calls per second. | |
| References: | |
| Kanehisa, M. and Goto, S.; KEGG: Kyoto Encyclopedia of Genes and Genomes. | |
| Nucleic Acids Res. 28, 29-34 (2000). | |
| """ | |
| import io | |
| from urllib.request import urlopen | |
| def _q(op, arg1, arg2=None, arg3=None): | |
| URL = "https://rest.kegg.jp/%s" | |
| if arg2 and arg3: | |
| args = f"{op}/{arg1}/{arg2}/{arg3}" | |
| elif arg2: | |
| args = f"{op}/{arg1}/{arg2}" | |
| else: | |
| args = f"{op}/{arg1}" | |
| resp = urlopen(URL % (args)) | |
| if "image" == arg2: | |
| return resp | |
| handle = io.TextIOWrapper(resp, encoding="UTF-8") | |
| handle.url = resp.url | |
| return handle | |
| # https://www.kegg.jp/kegg/rest/keggapi.html | |
| def kegg_info(database): | |
| """KEGG info - Displays the current statistics of a given database. | |
| db - database or organism (string) | |
| The argument db can be a KEGG database name (e.g. 'pathway' or its | |
| official abbreviation, 'path'), or a KEGG organism code or T number | |
| (e.g. 'hsa' or 'T01001' for human). | |
| A valid list of organism codes and their T numbers can be obtained | |
| via kegg_info('organism') or https://rest.kegg.jp/list/organism | |
| """ | |
| # TODO - return a string (rather than the handle?) | |
| # TODO - cache and validate the organism code / T numbers? | |
| # TODO - can we parse the somewhat formatted output? | |
| # | |
| # https://rest.kegg.jp/info/<database> | |
| # | |
| # <database> = pathway | brite | module | disease | drug | environ | | |
| # ko | genome |<org> | compound | glycan | reaction | | |
| # rpair | rclass | enzyme | genomes | genes | ligand | kegg | |
| # <org> = KEGG organism code or T number | |
| return _q("info", database) | |
| def kegg_list(database, org=None): | |
| """KEGG list - Entry list for database, or specified database entries. | |
| db - database or organism (string) | |
| org - optional organism (string), see below. | |
| For the pathway and module databases the optional organism can be | |
| used to restrict the results. | |
| """ | |
| # TODO - split into two functions (dbentries seems separate)? | |
| # | |
| # https://rest.kegg.jp/list/<database>/<org> | |
| # | |
| # <database> = pathway | module | |
| # <org> = KEGG organism code | |
| if database in ("pathway", "module") and org: | |
| resp = _q("list", database, org) | |
| elif isinstance(database, str) and database and org: | |
| raise ValueError("Invalid database arg for kegg list request.") | |
| # https://rest.kegg.jp/list/<database> | |
| # | |
| # <database> = pathway | brite | module | disease | drug | environ | | |
| # ko | genome | <org> | compound | glycan | reaction | | |
| # rpair | rclass | enzyme | organism | |
| # <org> = KEGG organism code or T number | |
| # | |
| # | |
| # https://rest.kegg.jp/list/<dbentries> | |
| # | |
| # <dbentries> = KEGG database entries involving the following <database> | |
| # <database> = pathway | brite | module | disease | drug | environ | | |
| # ko | genome | <org> | compound | glycan | reaction | | |
| # rpair | rclass | enzyme | |
| # <org> = KEGG organism code or T number | |
| else: | |
| if isinstance(database, list): | |
| if len(database) > 100: | |
| raise ValueError( | |
| "Maximum number of databases is 100 for kegg list query" | |
| ) | |
| database = ("+").join(database) | |
| resp = _q("list", database) | |
| return resp | |
| def kegg_find(database, query, option=None): | |
| """KEGG find - Data search. | |
| Finds entries with matching query keywords or other query data in | |
| a given database. | |
| db - database or organism (string) | |
| query - search terms (string) | |
| option - search option (string), see below. | |
| For the compound and drug database, set option to the string 'formula', | |
| 'exact_mass' or 'mol_weight' to search on that field only. The | |
| chemical formula search is a partial match irrespective of the order | |
| of atoms given. The exact mass (or molecular weight) is checked by | |
| rounding off to the same decimal place as the query data. A range of | |
| values may also be specified with the minus(-) sign. | |
| """ | |
| # TODO - return list of tuples? | |
| # | |
| # https://rest.kegg.jp/find/<database>/<query>/<option> | |
| # | |
| # <database> = compound | drug | |
| # <option> = formula | exact_mass | mol_weight | |
| if database in ["compound", "drug"] and option in [ | |
| "formula", | |
| "exact_mass", | |
| "mol_weight", | |
| ]: | |
| resp = _q("find", database, query, option) | |
| elif option: | |
| raise ValueError("Invalid option arg for kegg find request.") | |
| # https://rest.kegg.jp/find/<database>/<query> | |
| # | |
| # <database> = pathway | module | disease | drug | environ | ko | | |
| # genome | <org> | compound | glycan | reaction | rpair | | |
| # rclass | enzyme | genes | ligand | |
| # <org> = KEGG organism code or T number | |
| else: | |
| if isinstance(query, list): | |
| query = "+".join(query) | |
| resp = _q("find", database, query) | |
| return resp | |
| def kegg_get(dbentries, option=None): | |
| """KEGG get - Data retrieval. | |
| dbentries - Identifiers (single string, or list of strings), see below. | |
| option - One of "aaseq", "ntseq", "mol", "kcf", "image", "kgml" (string) | |
| The input is limited up to 10 entries. | |
| The input is limited to one pathway entry with the image or kgml option. | |
| The input is limited to one compound/glycan/drug entry with the image option. | |
| Returns a handle. | |
| """ | |
| if isinstance(dbentries, list) and len(dbentries) <= 10: | |
| dbentries = "+".join(dbentries) | |
| elif isinstance(dbentries, list) and len(dbentries) > 10: | |
| raise ValueError("Maximum number of dbentries is 10 for kegg get query") | |
| # https://rest.kegg.jp/get/<dbentries>[/<option>] | |
| # | |
| # <dbentries> = KEGG database entries involving the following <database> | |
| # <database> = pathway | brite | module | disease | drug | environ | | |
| # ko | genome | <org> | compound | glycan | reaction | | |
| # rpair | rclass | enzyme | |
| # <org> = KEGG organism code or T number | |
| # | |
| # <option> = aaseq | ntseq | mol | kcf | image | |
| if option in ["aaseq", "ntseq", "mol", "kcf", "image", "kgml", "json"]: | |
| resp = _q("get", dbentries, option) | |
| elif option: | |
| raise ValueError("Invalid option arg for kegg get request.") | |
| else: | |
| resp = _q("get", dbentries) | |
| return resp | |
| def kegg_conv(target_db, source_db, option=None): | |
| """KEGG conv - convert KEGG identifiers to/from outside identifiers. | |
| Arguments: | |
| - target_db - Target database | |
| - source_db_or_dbentries - source database or database entries | |
| - option - Can be "turtle" or "n-triple" (string). | |
| """ | |
| # https://rest.kegg.jp/conv/<target_db>/<source_db>[/<option>] | |
| # | |
| # (<target_db> <source_db>) = (<kegg_db> <outside_db>) | | |
| # (<outside_db> <kegg_db>) | |
| # | |
| # For gene identifiers: | |
| # <kegg_db> = <org> | |
| # <org> = KEGG organism code or T number | |
| # <outside_db> = ncbi-gi | ncbi-geneid | uniprot | |
| # | |
| # For chemical substance identifiers: | |
| # <kegg_db> = drug | compound | glycan | |
| # <outside_db> = pubchem | chebi | |
| # | |
| # <option> = turtle | n-triple | |
| # | |
| # https://rest.kegg.jp/conv/<target_db>/<dbentries>[/<option>] | |
| # | |
| # For gene identifiers: | |
| # <dbentries> = database entries involving the following <database> | |
| # <database> = <org> | ncbi-gi | ncbi-geneid | uniprot | |
| # <org> = KEGG organism code or T number | |
| # | |
| # For chemical substance identifiers: | |
| # <dbentries> = database entries involving the following <database> | |
| # <database> = drug | compound | glycan | pubchem | chebi | |
| # | |
| # <option> = turtle | n-triple | |
| if option and option not in ["turtle", "n-triple"]: | |
| raise ValueError("Invalid option arg for kegg conv request.") | |
| if isinstance(source_db, list): | |
| source_db = "+".join(source_db) | |
| if ( | |
| target_db in ["ncbi-gi", "ncbi-geneid", "uniprot"] | |
| or source_db in ["ncbi-gi", "ncbi-geneid", "uniprot"] | |
| or ( | |
| target_db in ["drug", "compound", "glycan"] | |
| and source_db in ["pubchem", "glycan"] | |
| ) | |
| or ( | |
| target_db in ["pubchem", "glycan"] | |
| and source_db in ["drug", "compound", "glycan"] | |
| ) | |
| ): | |
| if option: | |
| resp = _q("conv", target_db, source_db, option) | |
| else: | |
| resp = _q("conv", target_db, source_db) | |
| return resp | |
| else: | |
| raise ValueError("Bad argument target_db or source_db for kegg conv request.") | |
| def kegg_link(target_db, source_db, option=None): | |
| """KEGG link - find related entries by using database cross-references. | |
| target_db - Target database | |
| source_db_or_dbentries - source database | |
| option - Can be "turtle" or "n-triple" (string). | |
| """ | |
| # https://rest.kegg.jp/link/<target_db>/<source_db>[/<option>] | |
| # | |
| # <target_db> = <database> | |
| # <source_db> = <database> | |
| # | |
| # <database> = pathway | brite | module | ko | genome | <org> | compound | | |
| # glycan | reaction | rpair | rclass | enzyme | disease | | |
| # drug | dgroup | environ | |
| # | |
| # <option> = turtle | n-triple | |
| # https://rest.kegg.jp/link/<target_db>/<dbentries>[/<option>] | |
| # | |
| # <dbentries> = KEGG database entries involving the following <database> | |
| # <database> = pathway | brite | module | ko | genome | <org> | compound | | |
| # glycan | reaction | rpair | rclass | enzyme | disease | | |
| # drug | dgroup | environ | genes | |
| # | |
| # <option> = turtle | n-triple | |
| if option and option not in ["turtle", "n-triple"]: | |
| raise ValueError("Invalid option arg for kegg conv request.") | |
| if isinstance(source_db, list): | |
| source_db = "+".join(source_db) | |
| if option: | |
| resp = _q("link", target_db, source_db, option) | |
| else: | |
| resp = _q("link", target_db, source_db) | |
| return resp | |