| """8-bit string definitions for Python 2/3 compatibility |
| |
| Defines the following which allow for dealing with Python 3 breakages: |
| |
| STR_IS_BYTES |
| STR_IS_UNICODE |
| |
| Easily checked booleans for type identities |
| |
| _NULL_8_BYTE |
| |
| An 8-bit byte with NULL (0) value |
| |
| as_8_bit( x, encoding='utf-8') |
| |
| Returns the value as the 8-bit version |
| |
| unicode -- always pointing to the unicode type |
| bytes -- always pointing to the 8-bit bytes type |
| """ |
| import sys |
|
|
| STR_IS_BYTES = True |
|
|
| if sys.version_info[:2] < (2,6): |
| |
| bytes = str |
| else: |
| bytes = bytes |
| try: |
| long = long |
| except NameError as err: |
| long = int |
| if sys.version_info[:2] < (3,0): |
| |
| unicode = unicode |
| _NULL_8_BYTE = '\000' |
| def as_8_bit( x, encoding='utf-8' ): |
| if isinstance( x, unicode ): |
| return x.encode( encoding ) |
| return bytes( x ) |
| integer_types = int,long |
| def as_str( x, encoding='utf-8'): |
| """Produce a native string (i.e. different on python 2 and 3)""" |
| if isinstance(x,bytes): |
| return x |
| elif isinstance(x,unicode): |
| return x.encode(encoding) |
| else: |
| return str(x) |
| else: |
| |
| STR_IS_BYTES = False |
| _NULL_8_BYTE = bytes( '\000','latin1' ) |
| def as_8_bit( x, encoding='utf-8' ): |
| if isinstance( x,unicode ): |
| return x.encode(encoding) |
| elif isinstance( x, bytes ): |
| |
| |
| |
| return x |
| return str(x).encode( encoding ) |
| unicode = str |
| integer_types = int, |
| def as_str( x, encoding='utf-8'): |
| """Produce a native string (i.e. different on python 2 and 3)""" |
| if isinstance(x,unicode): |
| return x |
| elif isinstance(x,bytes): |
| return x.decode(encoding) |
| else: |
| return str(x) |
|
|
| STR_IS_UNICODE = not STR_IS_BYTES |
| if hasattr( sys, 'maxsize' ): |
| maxsize = sys.maxsize |
| else: |
| maxsize = sys.maxint |
|
|
| def as_unicode(x,encoding='utf-8'): |
| """Ensure is a unicode object given default encoding""" |
| if isinstance(x,unicode): |
| return x |
| elif isinstance(x,bytes): |
| return x.decode(encoding) |
| else: |
| return unicode(x) |
|
|