Spaces:

aakash0017
/

DrVai-Rag-Testing

No application file

App Files Files Community

DrVai-Rag-Testing / myenv /lib /python3.10 /site-packages /Bio /SwissProt /KeyWList.py

aakash0017

Upload folder using huggingface_hub

b7731cd over 2 years ago

raw

history blame contribute delete

3.47 kB

	# Copyright 1999 by Jeffrey Chang. All rights reserved.
	# This file is part of the Biopython distribution and governed by your
	# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
	# Please see the LICENSE file that should have been included as part of this
	# package.
	"""Code to parse the keywlist.txt file from SwissProt/UniProt.

	See:
	- https://www.uniprot.org/docs/keywlist.txt

	Classes:
	- Record Stores the information about one keyword or one category
	in the keywlist.txt file.

	Functions:
	- parse Parses the keywlist.txt file and returns an iterator to
	the records it contains.

	"""


	class Record(dict):
	"""Store information of one keyword or category from the keywords list.

	This record stores the information of one keyword or category in the
	keywlist.txt as a Python dictionary. The keys in this dictionary are
	the line codes that can appear in the keywlist.txt file::

	--------- --------------------------- ----------------------
	Line code Content Occurrence in an entry
	--------- --------------------------- ----------------------
	ID Identifier (keyword) Once; starts a keyword entry
	IC Identifier (category) Once; starts a category entry
	AC Accession (KW-xxxx) Once
	DE Definition Once or more
	SY Synonyms Optional; once or more
	GO Gene ontology (GO) mapping Optional; once or more
	HI Hierarchy Optional; once or more
	WW Relevant WWW site Optional; once or more
	CA Category Once per keyword entry; absent
	in category entries

	"""

	def __init__(self):
	"""Initialize the class."""
	dict.__init__(self)
	for keyword in ("DE", "SY", "GO", "HI", "WW"):
	self[keyword] = []


	def parse(handle):
	"""Parse the keyword list from file handle.

	Returns a generator object which yields keyword entries as
	Bio.SwissProt.KeyWList.Record() object.
	"""
	record = Record()
	# First, skip the header - look for start of a record
	for line in handle:
	if line.startswith("ID "):
	# Looks like there was no header
	record["ID"] = line[5:].strip()
	break
	if line.startswith("IC "):
	# Looks like there was no header
	record["IC"] = line[5:].strip()
	break
	# Now parse the records
	for line in handle:
	if line.startswith("-------------------------------------"):
	# We have reached the footer
	break
	key = line[:2]
	if key == "//":
	record["DE"] = " ".join(record["DE"])
	record["SY"] = " ".join(record["SY"])
	yield record
	record = Record()
	elif line[2:5] == " ":
	value = line[5:].strip()
	if key in ("ID", "IC", "AC", "CA"):
	record[key] = value
	elif key in ("DE", "SY", "GO", "HI", "WW"):
	record[key].append(value)
	else:
	raise ValueError(f"Cannot parse line '{line.strip()}'")
	# Read the footer and throw it away
	for line in handle:
	pass