Spaces:

aakash0017
/

DrVai-Rag-Testing

No application file

App Files Files Community

DrVai-Rag-Testing / myenv /lib /python3.10 /site-packages /Bio /SeqIO /AbiIO.py

aakash0017

Upload folder using huggingface_hub

b7731cd over 2 years ago

raw

history blame contribute delete

26.1 kB

	# Copyright 2011 by Wibowo Arindrarto (w.arindrarto@gmail.com)
	# Revisions copyright 2011-2016 by Peter Cock.
	#
	# This file is part of the Biopython distribution and governed by your
	# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
	# Please see the LICENSE file that should have been included as part of this
	# package.
	"""Bio.SeqIO parser for the ABI format.

	ABI is the format used by Applied Biosystem's sequencing machines to store
	sequencing results.

	For more details on the format specification, visit:
	http://www6.appliedbiosystems.com/support/software_community/ABIF_File_Format.pdf

	"""
	import datetime
	import struct
	import sys

	from os.path import basename

	from Bio.Seq import Seq
	from Bio.SeqRecord import SeqRecord

	from .Interfaces import SequenceIterator


	# dictionary for determining which tags goes into SeqRecord annotation
	# each key is tag_name + tag_number
	# if a tag entry needs to be added, just add its key and its key
	# for the annotations dictionary as the value
	# dictionary for tags that require preprocessing before use in creating
	# seqrecords
	_EXTRACT = {
	"TUBE1": "sample_well",
	"DySN1": "dye",
	"GTyp1": "polymer",
	"MODL1": "machine_model",
	}


	# Complete data structure representing 98% of the API. The general section
	# represents the part of the API that's common to ALL instruments, whereas the
	# instrument specific sections are labelled as they are in the ABIF spec
	#
	# Keys don't seem to clash from machine to machine, so when we parse, we look
	# for ANY key, and store that in the raw ABIF data structure attached to the
	# annotations, with the assumption that anyone parsing the data can look up the
	# spec themself
	#
	# Key definitions are retained in case end users want "nice" labels pre-made
	# for them for all of the available fields.
	_INSTRUMENT_SPECIFIC_TAGS = {}

	# fmt: off
	_INSTRUMENT_SPECIFIC_TAGS["general"] = {
	"APFN2": "Sequencing Analysis parameters file name",
	"APXV1": "Analysis Protocol XML schema version",
	"APrN1": "Analysis Protocol settings name",
	"APrV1": "Analysis Protocol settings version",
	"APrX1": "Analysis Protocol XML string",
	"CMNT1": "Sample Comment",
	"CTID1": "Container Identifier, a.k.a. plate barcode",
	"CTNM1": "Container name, usually identical to CTID, but not necessarily so",
	"CTTL1": "Comment Title",
	"CpEP1": "Capillary type electrophoresis. 1 for a capillary based machine. 0 for a slab gel based machine.",
	"DATA1": "Channel 1 raw data",
	"DATA2": "Channel 2 raw data",
	"DATA3": "Channel 3 raw data",
	"DATA4": "Channel 4 raw data",
	"DATA5": "Short Array holding measured volts/10 (EP voltage) during run",
	"DATA6": "Short Array holding measured milliAmps trace (EP current) during run",
	"DATA7": "Short Array holding measured milliWatts trace (Laser EP Power) during run",
	"DATA8": "Short Array holding measured oven Temperature (polymer temperature) trace during run",
	"DATA9": "Channel 9 processed data",
	"DATA10": "Channel 10 processed data",
	"DATA11": "Channel 11 processed data",
	"DATA12": "Channel 12 processed data",
	# Prism 3100/3100-Avant may provide DATA105
	# 3130/3130-XL may provide DATA105
	# 3530/3530-XL may provide DATA105-199, 9-12, 205-299
	"DSam1": "Downsampling factor",
	"DySN1": "Dye set name",
	"Dye#1": "Number of dyes",
	"DyeN1": "Dye 1 name",
	"DyeN2": "Dye 2 name",
	"DyeN3": "Dye 3 name",
	"DyeN4": "Dye 4 name",
	"DyeW1": "Dye 1 wavelength",
	"DyeW2": "Dye 2 wavelength",
	"DyeW3": "Dye 3 wavelength",
	"DyeW4": "Dye 4 wavelength",
	# 'DyeN5-N': 'Dye 5-N Name',
	# 'DyeW5-N': 'Dye 5-N Wavelength',
	"EPVt1": "Electrophoresis voltage setting (volts)",
	"EVNT1": "Start Run event",
	"EVNT2": "Stop Run event",
	"EVNT3": "Start Collection event",
	"EVNT4": "Stop Collection event",
	"FWO_1": 'Base Order. Sequencing Analysis Filter wheel order. Fixed for 3500 at "GATC"',
	"GTyp1": "Gel or polymer Type",
	"InSc1": "Injection time (seconds)",
	"InVt1": "Injection voltage (volts)",
	"LANE1": "Lane/Capillary",
	"LIMS1": "Sample tracking ID",
	"LNTD1": "Length to detector",
	"LsrP1": "Laser Power setting (micro Watts)",
	"MCHN1": "Instrument name and serial number",
	"MODF1": "Data collection module file",
	"MODL1": "Model number",
	"NAVG1": "Pixels averaged per lane",
	"NLNE1": "Number of capillaries",
	"OfSc1": "List of scans that are marked off scale in Collection. (optional)",
	# OvrI and OrvV are listed as "1-N", and "One for each dye (unanalyzed
	# and/or analyzed data)"
	"OvrI1": "List of scan number indexes that have values greater than 32767 but did not "
	"saturate the camera. In Genemapper samples, this can have indexes with "
	"values greater than 32000. In sequencing samples, this cannot have "
	"indexes with values greater than 32000.",
	"OvrI2": "List of scan number indexes that have values greater than 32767 but did not "
	"saturate the camera. In Genemapper samples, this can have indexes with "
	"values greater than 32000. In sequencing samples, this cannot have "
	"indexes with values greater than 32000.",
	"OvrI3": "List of scan number indexes that have values greater than 32767 but did not "
	"saturate the camera. In Genemapper samples, this can have indexes with "
	"values greater than 32000. In sequencing samples, this cannot have "
	"indexes with values greater than 32000.",
	"OvrI4": "List of scan number indexes that have values greater than 32767 but did not "
	"saturate the camera. In Genemapper samples, this can have indexes with "
	"values greater than 32000. In sequencing samples, this cannot have "
	"indexes with values greater than 32000.",
	"OvrV1": "List of color data values found at the locations listed in the OvrI tag. "
	"There must be exactly as many numbers in this array as in the OvrI array.",
	"OvrV2": "List of color data values found at the locations listed in the OvrI tag. "
	"There must be exactly as many numbers in this array as in the OvrI array.",
	"OvrV3": "List of color data values found at the locations listed in the OvrI tag. "
	"There must be exactly as many numbers in this array as in the OvrI array.",
	"OvrV4": "List of color data values found at the locations listed in the OvrI tag. "
	"There must be exactly as many numbers in this array as in the OvrI array.",
	"PDMF1": "Sequencing Analysis Mobility file name chosen in collection",
	"RMXV1": "Run Module XML schema version",
	"RMdN1": "Run Module name (same as MODF)",
	"RMdX1": "Run Module XML string",
	"RPrN1": "Run Protocol name",
	"RPrV1": "Run Protocol version",
	"RUND1": "Run Started Date",
	"RUND2": "Run Stopped Date",
	"RUND3": "Data Collection Started Date",
	"RUND4": "Data Collection Stopped date",
	"RUNT1": "Run Started Time",
	"RUNT2": "Run Stopped Time",
	"RUNT3": "Data Collection Started Time",
	"RUNT4": "Data Collection Stopped Time",
	"Rate1": "Scanning Rate. Milliseconds per frame.",
	"RunN1": "Run Name",
	"SCAN1": "Number of scans",
	"SMED1": "Polymer lot expiration date",
	"SMLt1": "Polymer lot number",
	"SMPL1": "Sample name",
	"SVER1": "Data collection software version",
	"SVER3": "Data collection firmware version",
	"Satd1": "Array of longs representing the scan numbers of data points, which are flagged as saturated by data collection (optional)",
	"Scal1": "Rescaling divisor for color data",
	"Scan1": "Number of scans (legacy - use SCAN)",
	"TUBE1": "Well ID",
	"Tmpr1": "Run temperature setting",
	"User1": "Name of user who created the plate (optional)",
	}

	# No instrument specific tags
	# _INSTRUMENT_SPECIFIC_TAGS['abi_prism_3100/3100-Avant'] = {
	# }

	_INSTRUMENT_SPECIFIC_TAGS["abi_3130/3130xl"] = {
	"CTOw1": "Container owner",
	"HCFG1": "Instrument Class",
	"HCFG2": "Instrument Family",
	"HCFG3": "Official Instrument Name",
	"HCFG4": "Instrument Parameters",
	"RMdVa1": "Run Module version",
	}

	_INSTRUMENT_SPECIFIC_TAGS["abi_3530/3530xl"] = {
	"AAct1": "Primary Analysis Audit Active indication. True if system auditing was enabled during the last write of this file, "
	"false if system auditing was disabled.",
	"ABED1": "Anode buffer expiration date using ISO 8601 format using the patterns YYYY-MM-DDTHH:MM:SS.ss+/-HH:MM. Hundredths of a second are optional.",
	"ABID1": "Anode buffer tray first installed date",
	"ABLt1": "Anode buffer lot number",
	"ABRn1": "Number of runs (injections) processed with the current Anode Buffer (runs allowed - runs remaining)",
	"ABTp1": "Anode buffer type",
	"AEPt1": "Analysis Ending scan number for basecalling on initial analysis",
	"AEPt2": "Analysis Ending scan number for basecalling on last analysis",
	"APCN1": "Amplicon name",
	"ARTN1": "Analysis Return code. Produced only by 5 Prime basecaller 1.0b3",
	"ASPF1": "Flag to indicate whether adaptive processing worked or not",
	"ASPt1": "Analysis Starting scan number for first analysis",
	"ASPt2": "Analysis Starting scan number for last analysis",
	"AUDT2": "Audit log used across 3500 software (optional)",
	"AVld1": "Assay validation flag (true or false)",
	"AmbT1": "Record of ambient temperature readings",
	"AsyC1": "The assay contents (xml format)",
	"AsyN1": "The assay name",
	"AsyV1": "The assay version",
	"B1Pt1": "Reference scan number for mobility and spacing curves for first analysis",
	"B1Pt2": "Reference scan number for mobility and spacing curves for last analysis",
	"BCTS1": "Basecaller timestamp. Time of completion of most recent analysis",
	"BcRn1": "Basecalling qc code",
	"BcRs1": "Basecalling warnings, a concatenated comma separated string",
	"BcRs2": "Basecalling errors, a concatenated comma separated string",
	"CAED1": "Capillary array expiration",
	"CALt1": "Capillary array lot number",
	"CARn1": "Number of injections processed (including the one of which this sample was a part) through the capillary array",
	"CASN1": "Capillary array serial number",
	"CBED1": "Cathode buffer expiration date",
	"CBID1": "Cathode buffer tray first installed date",
	"CBLt1": "Cathode buffer lot number",
	"CBRn1": "Number of runs (injections) processed with the current Cathode Buffer (runs allowed - runs remaining)",
	"CBTp1": "Cathode buffer type",
	"CLRG1": "Start of the clear range (inclusive).",
	"CLRG2": "Clear range length",
	"CRLn1": "Contiguous read length",
	"CRLn2": 'One of "Pass", "Fail", or "Check"',
	"CTOw1": "The name entered as the Owner of a plate, in the plate editor",
	"CkSm1": "File checksum",
	"DCEv1": "A list of door-close events, separated by semicolon. Door open events are generally paired with door close events.",
	"DCHT1": "Reserved for backward compatibility. The detection cell heater temperature setting from the Run Module. Not used for 3500.",
	"DOEv1": "A list of door-open events, separated by semicolon. Door close events are generally paired with door open events.",
	"ESig2": "Electronic signature record used across 3500 software",
	"FTab1": "Feature table. Can be created by Nibbler for Clear Range.",
	"FVoc1": "Feature table vocabulary. Can be created by Nibbler for Clear Range.",
	"Feat1": "Features. Can be created by Nibbler for Clear Range.",
	"HCFG1": "The Instrument Class. All upper case, no spaces. Initial valid value: CE",
	"HCFG2": "The Instrument Family. All upper case, no spaces. Valid values: 31XX or 37XX for UDC, 35XX (for 3500)",
	"HCFG3": "The official instrument name. Mixed case, minus any special formatting. Initial valid values: 3130, 3130xl, 3730, 3730xl, 3500, 3500xl.",
	"HCFG4": "Instrument parameters. Contains key-value pairs of instrument configuration information, separated by semicolons. "
	"Four parameters are included initially: UnitID=<UNITD number>, CPUBoard=<board type>, "
	"ArraySize=<# of capillaries>, SerialNumber=<Instrument Serial#>.",
	"InjN1": "Injection name",
	"LAST1": "Parameter settings information",
	"NOIS1": "The estimate of rms baseline noise (S/N ratio) for each dye for a successfully analyzed sample. "
	"Corresponds in order to the raw data in tags DATA 1-4. KB basecaller only.",
	"P1AM1": "Amplitude of primary peak, which is not necessarily equal to corresponding signal strength at that position",
	"P1RL1": "Deviation of primary peak position from (PLoc,2), times 100, rounded to integer",
	"P1WD1": "Full-width Half-max of primary peak, times 100, rounded to integer. "
	"Corresponding signal intensity is not necessarily equal to one half of primary peak amplitude",
	"P2AM1": "Amplitude of secondary peak, which is not necessarily equal to corresponding signal strength at that position",
	"P2BA1": "Base of secondary peak",
	"P2RL1": "Deviation of secondary peak position from (PLoc,2), times 100, rounded to integer",
	"PBAS1": "Array of sequence characters edited by user",
	"PBAS2": "Array of sequence characters as called by Basecaller",
	"PCON1": "Array of quality Values (0-255) as edited by user",
	"PCON2": "Array of quality values (0-255) as called by Basecaller",
	"PDMF2": "Mobility file name chosen in most recent analysis (identical to PDMF1)",
	"PLOC1": "Array of peak locations edited by user",
	"PLOC2": "Array of peak locations as called by Basecaller",
	"PRJT1": "SeqScape 2.0 project template name",
	"PROJ4": "SeqScape 2.0 project name",
	"PSZE1": "Plate size. The number of sample positions in the container. Current allowed values: 96, 384.",
	"PTYP1": "Plate type. Current allowed values: 96-Well, 384-Well.",
	"PuSc1": "Median pupscore",
	"QV201": "QV20+ value",
	"QV202": 'One of "Pass", "Fail", or "Check"',
	"QcPa1": "QC parameters",
	"QcRn1": "Trimming and QC code",
	"QcRs1": "QC warnings, a concatenated comma separated string",
	"QcRs2": "QC errors, a concatenated comma separated string",
	"RGOw1": "The name entered as the Owner of a Results Group, in the Results Group Editor. Implemented as the user name from the results group.",
	"RInj1": "Reinjection number. The reinjection number that this sample belongs to. Not present if there was no reinjection.",
	"RNmF1": "Raman normalization factor",
	"RevC1": "for whether the sequence has been complemented",
	"RunN1": "Run name (which, for 3500, is different from injection name)",
	"S/N%1": "Signal strength for each dye",
	"SMID1": "Polymer first installed date",
	"SMRn1": "Number of runs (injections) processed with the current polymer (runs allowed - runs remaining)",
	"SPAC1": "Average peak spacing used in last analysis",
	"SPAC2": "Basecaller name - corresponds to name of bcp file.",
	"SPAC3": "Average peak spacing last calculated by the Basecaller.",
	"SPEC1": "Sequencing Analysis Specimen Name",
	"SVER2": "Basecaller version number",
	"SVER4": "Sample File Format Version String",
	"ScPa1": "The parameter string of size caller",
	"ScSt1": "Raw data start point. Set to 0 for 3500 data collection.",
	"SpeN1": "Active spectral calibration name",
	"TrPa1": "Trimming parameters",
	"TrSc1": "Trace score.",
	"TrSc2": 'One of "Pass", "Fail", or "Check"',
	"phAR1": "Trace peak aria ratio",
	"phCH1": 'Chemistry type ("term", "prim", "unknown"), based on DYE_1 information',
	"phDY1": 'Dye ("big", "d-rhod", "unknown"), based on mob file information',
	"phQL1": "Maximum Quality Value",
	"phTR1": "Set Trim region",
	"phTR2": "Trim probability",
	}

	_INSTRUMENT_SPECIFIC_TAGS["abi_3730/3730xl"] = {
	"BufT1": "Buffer tray heater temperature (degrees C)",
	}
	# fmt: on

	# dictionary for data unpacking format
	_BYTEFMT = {
	1: "b", # byte
	2: "s", # char
	3: "H", # word
	4: "h", # short
	5: "i", # long
	6: "2i", # rational, legacy unsupported
	7: "f", # float
	8: "d", # double
	10: "h2B", # date
	11: "4B", # time
	12: "2i2b", # thumb
	13: "B", # bool
	14: "2h", # point, legacy unsupported
	15: "4h", # rect, legacy unsupported
	16: "2i", # vPoint, legacy unsupported
	17: "4i", # vRect, legacy unsupported
	18: "s", # pString
	19: "s", # cString
	20: "2i", # tag, legacy unsupported
	}
	# header data structure (excluding 4 byte ABIF marker)
	_HEADFMT = ">H4sI2H3I"
	# directory data structure
	_DIRFMT = ">4sI2H4I"

	__global_tag_listing = []
	for tag in _INSTRUMENT_SPECIFIC_TAGS.values():
	__global_tag_listing += tag.keys()


	def _get_string_tag(opt_bytes_value, default=None):
	"""Return the string value of the given an optional raw bytes tag value.

	If the bytes value is None, return the given default value.

	"""
	if opt_bytes_value is None:
	return default
	try:
	return opt_bytes_value.decode()
	except UnicodeDecodeError:
	return opt_bytes_value.decode(encoding=sys.getdefaultencoding())


	class AbiIterator(SequenceIterator):
	"""Parser for Abi files."""

	def __init__(self, source, trim=False):
	"""Return an iterator for the Abi file format."""
	self.trim = trim
	super().__init__(source, mode="b", fmt="ABI")

	def parse(self, handle):
	"""Start parsing the file, and return a SeqRecord generator."""
	# check if input file is a valid Abi file
	marker = handle.read(4)
	if not marker:
	# handle empty file gracefully
	raise ValueError("Empty file.")

	if marker != b"ABIF":
	raise OSError(f"File should start ABIF, not {marker!r}")
	records = self.iterate(handle)
	return records

	def iterate(self, handle):
	"""Parse the file and generate SeqRecord objects."""
	# dirty hack for handling time information
	times = {"RUND1": "", "RUND2": "", "RUNT1": "", "RUNT2": ""}

	# initialize annotations
	annot = dict(zip(_EXTRACT.values(), [None] * len(_EXTRACT)))

	# parse header and extract data from directories
	header = struct.unpack(_HEADFMT, handle.read(struct.calcsize(_HEADFMT)))

	# Set default sample ID value, which we expect to be present in most
	# cases in the SMPL1 tag, but may be missing.
	sample_id = "<unknown id>"

	raw = {}
	seq = qual = None
	for tag_name, tag_number, tag_data in _abi_parse_header(header, handle):
	key = tag_name + str(tag_number)

	raw[key] = tag_data

	# PBAS2 is base-called sequence, only available in 3530
	if key == "PBAS2":
	seq = tag_data.decode()
	# PCON2 is quality values of base-called sequence
	elif key == "PCON2":
	qual = [ord(val) for val in tag_data.decode()]
	# SMPL1 is sample id entered before sequencing run, it must be
	# a string.
	elif key == "SMPL1":
	sample_id = _get_string_tag(tag_data)
	elif key in times:
	times[key] = tag_data
	else:
	if key in _EXTRACT:
	annot[_EXTRACT[key]] = tag_data

	# set time annotations
	annot["run_start"] = f"{times['RUND1']} {times['RUNT1']}"
	annot["run_finish"] = f"{times['RUND2']} {times['RUNT2']}"

	# raw data (for advanced end users benefit)
	annot["abif_raw"] = raw

	# fsa check
	is_fsa_file = all(tn not in raw for tn in ("PBAS1", "PBAS2"))

	if is_fsa_file:
	try:
	file_name = basename(handle.name).replace(".fsa", "")
	except AttributeError:
	file_name = ""

	sample_id = _get_string_tag(raw.get("LIMS1"), sample_id)
	description = _get_string_tag(raw.get("CTID1"), "<unknown description>")
	record = SeqRecord(
	Seq(""),
	id=sample_id,
	name=file_name,
	description=description,
	annotations=annot,
	)

	else:
	# use the file name as SeqRecord.name if available
	try:
	file_name = basename(handle.name).replace(".ab1", "")
	except AttributeError:
	file_name = ""
	record = SeqRecord(
	Seq(seq),
	id=sample_id,
	name=file_name,
	description="",
	annotations=annot,
	)
	if qual:
	# Expect this to be missing for FSA files.
	record.letter_annotations["phred_quality"] = qual
	elif not is_fsa_file and not qual and self.trim:
	raise ValueError(
	"The 'abi-trim' format can not be used for files without"
	" quality values."
	)

	if self.trim and not is_fsa_file:
	record = _abi_trim(record)

	record.annotations["molecule_type"] = "DNA"
	yield record


	def _AbiTrimIterator(handle):
	"""Return an iterator for the Abi file format that yields trimmed SeqRecord objects (PRIVATE)."""
	return AbiIterator(handle, trim=True)


	def _abi_parse_header(header, handle):
	"""Return directory contents (PRIVATE)."""
	# header structure (after ABIF marker):
	# file version, tag name, tag number,
	# element type code, element size, number of elements
	# data size, data offset, handle (not file handle)
	head_elem_size = header[4]
	head_elem_num = header[5]
	head_offset = header[7]
	index = 0

	while index < head_elem_num:
	start = head_offset + index * head_elem_size
	# add directory offset to tuple
	# to handle directories with data size <= 4 bytes
	handle.seek(start)
	dir_entry = struct.unpack(_DIRFMT, handle.read(struct.calcsize(_DIRFMT))) + (
	start,
	)
	index += 1
	# only parse desired dirs
	key = dir_entry[0].decode()
	key += str(dir_entry[1])

	tag_name = dir_entry[0].decode()
	tag_number = dir_entry[1]
	elem_code = dir_entry[2]
	elem_num = dir_entry[4]
	data_size = dir_entry[5]
	data_offset = dir_entry[6]
	tag_offset = dir_entry[8]
	# if data size <= 4 bytes, data is stored inside tag
	# so offset needs to be changed
	if data_size <= 4:
	data_offset = tag_offset + 20
	handle.seek(data_offset)
	data = handle.read(data_size)
	yield tag_name, tag_number, _parse_tag_data(elem_code, elem_num, data)


	def _abi_trim(seq_record):
	"""Trims the sequence using Richard Mott's modified trimming algorithm (PRIVATE).

	Arguments:
	- seq_record - SeqRecord object to be trimmed.

	Trimmed bases are determined from their segment score, which is a
	cumulative sum of each base's score. Base scores are calculated from
	their quality values.

	More about the trimming algorithm:
	http://www.phrap.org/phredphrap/phred.html
	http://resources.qiagenbioinformatics.com/manuals/clcgenomicsworkbench/650/Quality_trimming.html
	"""
	start = False # flag for starting position of trimmed sequence
	segment = 20 # minimum sequence length
	trim_start = 0 # init start index
	cutoff = 0.05 # default cutoff value for calculating base score

	if len(seq_record) <= segment:
	return seq_record
	else:
	# calculate base score
	score_list = [
	cutoff - (10 ** (qual / -10.0))
	for qual in seq_record.letter_annotations["phred_quality"]
	]

	# calculate cumulative score
	# if cumulative value < 0, set it to 0
	# first value is set to 0, because of the assumption that
	# the first base will always be trimmed out
	cummul_score = [0]
	for i in range(1, len(score_list)):
	score = cummul_score[-1] + score_list[i]
	if score < 0:
	cummul_score.append(0)
	else:
	cummul_score.append(score)
	if not start:
	# trim_start = value when cumulative score is first > 0
	trim_start = i
	start = True

	# trim_finish = index of highest cumulative score,
	# marking the end of sequence segment with highest cumulative score
	trim_finish = cummul_score.index(max(cummul_score))

	return seq_record[trim_start:trim_finish]


	def _parse_tag_data(elem_code, elem_num, raw_data):
	"""Return single data value (PRIVATE).

	Arguments:
	- elem_code - What kind of data
	- elem_num - How many data points
	- raw_data - abi file object from which the tags would be unpacked

	"""
	if elem_code in _BYTEFMT:
	# because '>1s' unpack differently from '>s'
	if elem_num == 1:
	num = ""
	else:
	num = str(elem_num)
	fmt = ">" + num + _BYTEFMT[elem_code]

	assert len(raw_data) == struct.calcsize(fmt)
	data = struct.unpack(fmt, raw_data)

	# no need to use tuple if len(data) == 1
	# also if data is date / time
	if elem_code not in [10, 11] and len(data) == 1:
	data = data[0]

	# account for different data types
	if elem_code == 2:
	return data
	elif elem_code == 10:
	return str(datetime.date(*data))
	elif elem_code == 11:
	return str(datetime.time(*data[:3]))
	elif elem_code == 13:
	return bool(data)
	elif elem_code == 18:
	return data[1:]
	elif elem_code == 19:
	return data[:-1]
	else:
	return data
	else:
	return None


	if __name__ == "__main__":
	pass