Spaces:

aakash0017
/

DrVai-Rag-Testing

No application file

App Files Files Community

DrVai-Rag-Testing / myenv /lib /python3.10 /site-packages /Bio /SCOP /Cla.py

aakash0017

Upload folder using huggingface_hub

b7731cd over 2 years ago

raw

history blame contribute delete

3.92 kB

	# Copyright 2001 by Gavin E. Crooks. All rights reserved.
	# Revisions copyright 2010 Jeffrey Finkelstein. All rights reserved.
	#
	# This file is part of the Biopython distribution and governed by your
	# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
	# Please see the LICENSE file that should have been included as part of this
	# package.
	"""Handle the SCOP CLAssification file, which describes SCOP domains.

	The file format is described in the scop
	"release notes.":http://scop.mrc-lmb.cam.ac.uk/scop/release-notes.html
	The latest CLA file can be found
	"elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/

	"Release 1.73": http://scop.mrc-lmb.cam.ac.uk/scop/parse/dir.cla.scop.txt_1.73
	(July 2008)

	"""

	from . import Residues


	class Record:
	"""Holds information for one SCOP domain.

	Attributes:
	- sid - SCOP identifier. e.g. d1danl2
	- residues - The domain definition as a Residues object
	- sccs - SCOP concise classification strings. e.g. b.1.2.1
	- sunid - SCOP unique identifier for this domain
	- hierarchy - A dictionary, keys are nodetype, values are sunid,
	describing the location of this domain in the SCOP hierarchy. See
	the Scop module for a description of nodetypes. This used to be a
	list of (key,value) tuples in older versions of Biopython (see
	Bug 3109).

	"""

	def __init__(self, line=None):
	"""Initialize the class."""
	self.sid = ""
	self.residues = None
	self.sccs = ""
	self.sunid = ""
	self.hierarchy = {}
	if line:
	self._process(line)

	def _process(self, line):
	line = line.rstrip() # no trailing whitespace
	columns = line.split("\t") # separate the tab-delineated cols
	if len(columns) != 6:
	raise ValueError(f"I don't understand the format of {line}")

	self.sid, pdbid, residues, self.sccs, self.sunid, hierarchy = columns
	self.residues = Residues.Residues(residues)
	self.residues.pdbid = pdbid
	self.sunid = int(self.sunid)

	for ht in hierarchy.split(","):
	key, value = ht.split("=")
	self.hierarchy[key] = int(value)

	def __str__(self):
	"""Represent the SCOP classification record as a tab-separated string."""
	s = []
	s.append(self.sid)
	s += str(self.residues).split(" ")
	s.append(self.sccs)
	s.append(self.sunid)

	s.append(
	",".join(
	"=".join((key, str(value))) for key, value in self.hierarchy.items()
	)
	)

	return "\t".join(map(str, s)) + "\n"


	def parse(handle):
	"""Iterate over a CLA file as Cla records for each line.

	Arguments:
	- handle - file-like object.

	"""
	for line in handle:
	if line.startswith("#"):
	continue
	yield Record(line)


	class Index(dict):
	"""A CLA file indexed by SCOP identifiers for rapid random access."""

	def __init__(self, filename):
	"""Create CLA index.

	Arguments:
	- filename - The file to index

	"""
	dict.__init__(self)
	self.filename = filename
	with open(self.filename) as f:
	position = 0
	while True:
	line = f.readline()
	if not line:
	break
	if line.startswith("#"):
	continue
	record = Record(line)
	key = record.sid
	if key is not None:
	self[key] = position
	position = f.tell()

	def __getitem__(self, key):
	"""Return an item from the indexed file."""
	position = dict.__getitem__(self, key)

	with open(self.filename) as f:
	f.seek(position)
	line = f.readline()
	record = Record(line)
	return record