|
|
#ifndef Py_CPYTHON_UNICODEOBJECT_H |
|
|
# error "this header file must not be included directly" |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define PY_UNICODE_TYPE wchar_t |
|
|
typedef wchar_t Py_UNICODE; |
|
|
|
|
|
|
|
|
|
|
|
#ifndef USE_UNICODE_WCHAR_CACHE |
|
|
# define USE_UNICODE_WCHAR_CACHE 1 |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define Py_UNICODE_ISSPACE(ch) \ |
|
|
((Py_UCS4)(ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch)) |
|
|
|
|
|
#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch) |
|
|
#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch) |
|
|
#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch) |
|
|
#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch) |
|
|
|
|
|
#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch) |
|
|
#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch) |
|
|
#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch) |
|
|
|
|
|
#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch) |
|
|
#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch) |
|
|
#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch) |
|
|
#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch) |
|
|
|
|
|
#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch) |
|
|
#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch) |
|
|
#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch) |
|
|
|
|
|
#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch) |
|
|
|
|
|
#define Py_UNICODE_ISALNUM(ch) \ |
|
|
(Py_UNICODE_ISALPHA(ch) || \ |
|
|
Py_UNICODE_ISDECIMAL(ch) || \ |
|
|
Py_UNICODE_ISDIGIT(ch) || \ |
|
|
Py_UNICODE_ISNUMERIC(ch)) |
|
|
|
|
|
|
|
|
#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF) |
|
|
#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF) |
|
|
#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF) |
|
|
|
|
|
#define Py_UNICODE_JOIN_SURROGATES(high, low) \ |
|
|
(((((Py_UCS4)(high) & 0x03FF) << 10) | \ |
|
|
((Py_UCS4)(low) & 0x03FF)) + 0x10000) |
|
|
|
|
|
#define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10)) |
|
|
|
|
|
#define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PyObject_HEAD |
|
|
Py_ssize_t length; |
|
|
Py_hash_t hash; |
|
|
struct { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
unsigned int interned:2; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
unsigned int kind:3; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
unsigned int compact:1; |
|
|
|
|
|
|
|
|
|
|
|
unsigned int ascii:1; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
unsigned int ready:1; |
|
|
|
|
|
|
|
|
unsigned int :24; |
|
|
} state; |
|
|
wchar_t *wstr; |
|
|
} PyASCIIObject; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct { |
|
|
PyASCIIObject _base; |
|
|
Py_ssize_t utf8_length; |
|
|
|
|
|
char *utf8; |
|
|
Py_ssize_t wstr_length; |
|
|
|
|
|
} PyCompactUnicodeObject; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct { |
|
|
PyCompactUnicodeObject _base; |
|
|
union { |
|
|
void *any; |
|
|
Py_UCS1 *latin1; |
|
|
Py_UCS2 *ucs2; |
|
|
Py_UCS4 *ucs4; |
|
|
} data; |
|
|
} PyUnicodeObject; |
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_CheckConsistency( |
|
|
PyObject *op, |
|
|
int check_content); |
|
|
|
|
|
|
|
|
#define _PyASCIIObject_CAST(op) \ |
|
|
(assert(PyUnicode_Check(op)), \ |
|
|
_Py_CAST(PyASCIIObject*, (op))) |
|
|
#define _PyCompactUnicodeObject_CAST(op) \ |
|
|
(assert(PyUnicode_Check(op)), \ |
|
|
_Py_CAST(PyCompactUnicodeObject*, (op))) |
|
|
#define _PyUnicodeObject_CAST(op) \ |
|
|
(assert(PyUnicode_Check(op)), \ |
|
|
_Py_CAST(PyUnicodeObject*, (op))) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define SSTATE_NOT_INTERNED 0 |
|
|
#define SSTATE_INTERNED_MORTAL 1 |
|
|
#define SSTATE_INTERNED_IMMORTAL 2 |
|
|
|
|
|
|
|
|
static inline unsigned int PyUnicode_CHECK_INTERNED(PyObject *op) { |
|
|
return _PyASCIIObject_CAST(op)->state.interned; |
|
|
} |
|
|
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 |
|
|
# define PyUnicode_CHECK_INTERNED(op) PyUnicode_CHECK_INTERNED(_PyObject_CAST(op)) |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
static inline unsigned int PyUnicode_IS_READY(PyObject *op) { |
|
|
return _PyASCIIObject_CAST(op)->state.ready; |
|
|
} |
|
|
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 |
|
|
# define PyUnicode_IS_READY(op) PyUnicode_IS_READY(_PyObject_CAST(op)) |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static inline unsigned int PyUnicode_IS_ASCII(PyObject *op) { |
|
|
assert(PyUnicode_IS_READY(op)); |
|
|
return _PyASCIIObject_CAST(op)->state.ascii; |
|
|
} |
|
|
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 |
|
|
# define PyUnicode_IS_ASCII(op) PyUnicode_IS_ASCII(_PyObject_CAST(op)) |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
static inline unsigned int PyUnicode_IS_COMPACT(PyObject *op) { |
|
|
return _PyASCIIObject_CAST(op)->state.compact; |
|
|
} |
|
|
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 |
|
|
# define PyUnicode_IS_COMPACT(op) PyUnicode_IS_COMPACT(_PyObject_CAST(op)) |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
static inline int PyUnicode_IS_COMPACT_ASCII(PyObject *op) { |
|
|
return (_PyASCIIObject_CAST(op)->state.ascii && PyUnicode_IS_COMPACT(op)); |
|
|
} |
|
|
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 |
|
|
# define PyUnicode_IS_COMPACT_ASCII(op) PyUnicode_IS_COMPACT_ASCII(_PyObject_CAST(op)) |
|
|
#endif |
|
|
|
|
|
enum PyUnicode_Kind { |
|
|
|
|
|
|
|
|
|
|
|
PyUnicode_WCHAR_KIND = 0, |
|
|
|
|
|
PyUnicode_1BYTE_KIND = 1, |
|
|
PyUnicode_2BYTE_KIND = 2, |
|
|
PyUnicode_4BYTE_KIND = 4 |
|
|
}; |
|
|
|
|
|
|
|
|
#define PyUnicode_KIND(op) \ |
|
|
(assert(PyUnicode_IS_READY(op)), \ |
|
|
_PyASCIIObject_CAST(op)->state.kind) |
|
|
|
|
|
|
|
|
static inline void* _PyUnicode_COMPACT_DATA(PyObject *op) { |
|
|
if (PyUnicode_IS_ASCII(op)) { |
|
|
return _Py_STATIC_CAST(void*, (_PyASCIIObject_CAST(op) + 1)); |
|
|
} |
|
|
return _Py_STATIC_CAST(void*, (_PyCompactUnicodeObject_CAST(op) + 1)); |
|
|
} |
|
|
|
|
|
static inline void* _PyUnicode_NONCOMPACT_DATA(PyObject *op) { |
|
|
void *data; |
|
|
assert(!PyUnicode_IS_COMPACT(op)); |
|
|
data = _PyUnicodeObject_CAST(op)->data.any; |
|
|
assert(data != NULL); |
|
|
return data; |
|
|
} |
|
|
|
|
|
static inline void* PyUnicode_DATA(PyObject *op) { |
|
|
if (PyUnicode_IS_COMPACT(op)) { |
|
|
return _PyUnicode_COMPACT_DATA(op); |
|
|
} |
|
|
return _PyUnicode_NONCOMPACT_DATA(op); |
|
|
} |
|
|
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 |
|
|
# define PyUnicode_DATA(op) PyUnicode_DATA(_PyObject_CAST(op)) |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define PyUnicode_1BYTE_DATA(op) _Py_STATIC_CAST(Py_UCS1*, PyUnicode_DATA(op)) |
|
|
#define PyUnicode_2BYTE_DATA(op) _Py_STATIC_CAST(Py_UCS2*, PyUnicode_DATA(op)) |
|
|
#define PyUnicode_4BYTE_DATA(op) _Py_STATIC_CAST(Py_UCS4*, PyUnicode_DATA(op)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static inline Py_ssize_t PyUnicode_GET_LENGTH(PyObject *op) { |
|
|
assert(PyUnicode_IS_READY(op)); |
|
|
return _PyASCIIObject_CAST(op)->length; |
|
|
} |
|
|
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 |
|
|
# define PyUnicode_GET_LENGTH(op) PyUnicode_GET_LENGTH(_PyObject_CAST(op)) |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static inline void PyUnicode_WRITE(int kind, void *data, |
|
|
Py_ssize_t index, Py_UCS4 value) |
|
|
{ |
|
|
if (kind == PyUnicode_1BYTE_KIND) { |
|
|
assert(value <= 0xffU); |
|
|
_Py_STATIC_CAST(Py_UCS1*, data)[index] = _Py_STATIC_CAST(Py_UCS1, value); |
|
|
} |
|
|
else if (kind == PyUnicode_2BYTE_KIND) { |
|
|
assert(value <= 0xffffU); |
|
|
_Py_STATIC_CAST(Py_UCS2*, data)[index] = _Py_STATIC_CAST(Py_UCS2, value); |
|
|
} |
|
|
else { |
|
|
assert(kind == PyUnicode_4BYTE_KIND); |
|
|
assert(value <= 0x10ffffU); |
|
|
_Py_STATIC_CAST(Py_UCS4*, data)[index] = value; |
|
|
} |
|
|
} |
|
|
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 |
|
|
#define PyUnicode_WRITE(kind, data, index, value) \ |
|
|
PyUnicode_WRITE(_Py_STATIC_CAST(int, kind), _Py_CAST(void*, data), \ |
|
|
(index), _Py_STATIC_CAST(Py_UCS4, value)) |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
static inline Py_UCS4 PyUnicode_READ(int kind, |
|
|
const void *data, Py_ssize_t index) |
|
|
{ |
|
|
if (kind == PyUnicode_1BYTE_KIND) { |
|
|
return _Py_STATIC_CAST(const Py_UCS1*, data)[index]; |
|
|
} |
|
|
if (kind == PyUnicode_2BYTE_KIND) { |
|
|
return _Py_STATIC_CAST(const Py_UCS2*, data)[index]; |
|
|
} |
|
|
assert(kind == PyUnicode_4BYTE_KIND); |
|
|
return _Py_STATIC_CAST(const Py_UCS4*, data)[index]; |
|
|
} |
|
|
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 |
|
|
#define PyUnicode_READ(kind, data, index) \ |
|
|
PyUnicode_READ(_Py_STATIC_CAST(int, kind), \ |
|
|
_Py_STATIC_CAST(const void*, data), \ |
|
|
(index)) |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static inline Py_UCS4 PyUnicode_READ_CHAR(PyObject *unicode, Py_ssize_t index) |
|
|
{ |
|
|
int kind; |
|
|
assert(PyUnicode_IS_READY(unicode)); |
|
|
kind = PyUnicode_KIND(unicode); |
|
|
if (kind == PyUnicode_1BYTE_KIND) { |
|
|
return PyUnicode_1BYTE_DATA(unicode)[index]; |
|
|
} |
|
|
if (kind == PyUnicode_2BYTE_KIND) { |
|
|
return PyUnicode_2BYTE_DATA(unicode)[index]; |
|
|
} |
|
|
assert(kind == PyUnicode_4BYTE_KIND); |
|
|
return PyUnicode_4BYTE_DATA(unicode)[index]; |
|
|
} |
|
|
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 |
|
|
# define PyUnicode_READ_CHAR(unicode, index) \ |
|
|
PyUnicode_READ_CHAR(_PyObject_CAST(unicode), (index)) |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static inline Py_UCS4 PyUnicode_MAX_CHAR_VALUE(PyObject *op) |
|
|
{ |
|
|
int kind; |
|
|
|
|
|
assert(PyUnicode_IS_READY(op)); |
|
|
if (PyUnicode_IS_ASCII(op)) { |
|
|
return 0x7fU; |
|
|
} |
|
|
|
|
|
kind = PyUnicode_KIND(op); |
|
|
if (kind == PyUnicode_1BYTE_KIND) { |
|
|
return 0xffU; |
|
|
} |
|
|
if (kind == PyUnicode_2BYTE_KIND) { |
|
|
return 0xffffU; |
|
|
} |
|
|
assert(kind == PyUnicode_4BYTE_KIND); |
|
|
return 0x10ffffU; |
|
|
} |
|
|
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 |
|
|
# define PyUnicode_MAX_CHAR_VALUE(op) \ |
|
|
PyUnicode_MAX_CHAR_VALUE(_PyObject_CAST(op)) |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(PyObject*) PyUnicode_New( |
|
|
Py_ssize_t size, |
|
|
Py_UCS4 maxchar |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_Ready( |
|
|
PyObject *unicode |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static inline int PyUnicode_READY(PyObject *op) |
|
|
{ |
|
|
if (PyUnicode_IS_READY(op)) { |
|
|
return 0; |
|
|
} |
|
|
return _PyUnicode_Ready(op); |
|
|
} |
|
|
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 |
|
|
# define PyUnicode_READY(op) PyUnicode_READY(_PyObject_CAST(op)) |
|
|
#endif |
|
|
|
|
|
|
|
|
PyAPI_FUNC(PyObject*) _PyUnicode_Copy( |
|
|
PyObject *unicode |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters( |
|
|
PyObject *to, |
|
|
Py_ssize_t to_start, |
|
|
PyObject *from, |
|
|
Py_ssize_t from_start, |
|
|
Py_ssize_t how_many |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters( |
|
|
PyObject *to, |
|
|
Py_ssize_t to_start, |
|
|
PyObject *from, |
|
|
Py_ssize_t from_start, |
|
|
Py_ssize_t how_many |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill( |
|
|
PyObject *unicode, |
|
|
Py_ssize_t start, |
|
|
Py_ssize_t length, |
|
|
Py_UCS4 fill_char |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(void) _PyUnicode_FastFill( |
|
|
PyObject *unicode, |
|
|
Py_ssize_t start, |
|
|
Py_ssize_t length, |
|
|
Py_UCS4 fill_char |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData( |
|
|
int kind, |
|
|
const void *buffer, |
|
|
Py_ssize_t size); |
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII( |
|
|
const char *buffer, |
|
|
Py_ssize_t size); |
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar ( |
|
|
PyObject *unicode, |
|
|
Py_ssize_t start, |
|
|
Py_ssize_t end); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode( |
|
|
const Py_UNICODE *u, |
|
|
Py_ssize_t size |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode( |
|
|
PyObject *unicode |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode( |
|
|
PyObject *unicode |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize( |
|
|
PyObject *unicode, |
|
|
Py_ssize_t *size |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Py_DEPRECATED(3.3) |
|
|
static inline Py_ssize_t PyUnicode_WSTR_LENGTH(PyObject *op) |
|
|
{ |
|
|
if (PyUnicode_IS_COMPACT_ASCII(op)) { |
|
|
return _PyASCIIObject_CAST(op)->length; |
|
|
} |
|
|
else { |
|
|
return _PyCompactUnicodeObject_CAST(op)->wstr_length; |
|
|
} |
|
|
} |
|
|
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 |
|
|
# define PyUnicode_WSTR_LENGTH(op) PyUnicode_WSTR_LENGTH(_PyObject_CAST(op)) |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Py_DEPRECATED(3.3) |
|
|
static inline Py_ssize_t PyUnicode_GET_SIZE(PyObject *op) |
|
|
{ |
|
|
_Py_COMP_DIAG_PUSH |
|
|
_Py_COMP_DIAG_IGNORE_DEPR_DECLS |
|
|
if (_PyASCIIObject_CAST(op)->wstr == _Py_NULL) { |
|
|
(void)PyUnicode_AsUnicode(op); |
|
|
assert(_PyASCIIObject_CAST(op)->wstr != _Py_NULL); |
|
|
} |
|
|
return PyUnicode_WSTR_LENGTH(op); |
|
|
_Py_COMP_DIAG_POP |
|
|
} |
|
|
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 |
|
|
# define PyUnicode_GET_SIZE(op) PyUnicode_GET_SIZE(_PyObject_CAST(op)) |
|
|
#endif |
|
|
|
|
|
Py_DEPRECATED(3.3) |
|
|
static inline Py_ssize_t PyUnicode_GET_DATA_SIZE(PyObject *op) |
|
|
{ |
|
|
_Py_COMP_DIAG_PUSH |
|
|
_Py_COMP_DIAG_IGNORE_DEPR_DECLS |
|
|
return PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE; |
|
|
_Py_COMP_DIAG_POP |
|
|
} |
|
|
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 |
|
|
# define PyUnicode_GET_DATA_SIZE(op) PyUnicode_GET_DATA_SIZE(_PyObject_CAST(op)) |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Py_DEPRECATED(3.3) |
|
|
static inline Py_UNICODE* PyUnicode_AS_UNICODE(PyObject *op) |
|
|
{ |
|
|
wchar_t *wstr = _PyASCIIObject_CAST(op)->wstr; |
|
|
if (wstr != _Py_NULL) { |
|
|
return wstr; |
|
|
} |
|
|
|
|
|
_Py_COMP_DIAG_PUSH |
|
|
_Py_COMP_DIAG_IGNORE_DEPR_DECLS |
|
|
return PyUnicode_AsUnicode(op); |
|
|
_Py_COMP_DIAG_POP |
|
|
} |
|
|
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 |
|
|
# define PyUnicode_AS_UNICODE(op) PyUnicode_AS_UNICODE(_PyObject_CAST(op)) |
|
|
#endif |
|
|
|
|
|
Py_DEPRECATED(3.3) |
|
|
static inline const char* PyUnicode_AS_DATA(PyObject *op) |
|
|
{ |
|
|
_Py_COMP_DIAG_PUSH |
|
|
_Py_COMP_DIAG_IGNORE_DEPR_DECLS |
|
|
Py_UNICODE *data = PyUnicode_AS_UNICODE(op); |
|
|
|
|
|
return _Py_STATIC_CAST(const char*, _Py_STATIC_CAST(const void*, data)); |
|
|
_Py_COMP_DIAG_POP |
|
|
} |
|
|
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 |
|
|
# define PyUnicode_AS_DATA(op) PyUnicode_AS_DATA(_PyObject_CAST(op)) |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct { |
|
|
PyObject *buffer; |
|
|
void *data; |
|
|
enum PyUnicode_Kind kind; |
|
|
Py_UCS4 maxchar; |
|
|
Py_ssize_t size; |
|
|
Py_ssize_t pos; |
|
|
|
|
|
|
|
|
Py_ssize_t min_length; |
|
|
|
|
|
|
|
|
Py_UCS4 min_char; |
|
|
|
|
|
|
|
|
unsigned char overallocate; |
|
|
|
|
|
|
|
|
|
|
|
unsigned char readonly; |
|
|
} _PyUnicodeWriter ; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(void) |
|
|
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \ |
|
|
(((MAXCHAR) <= (WRITER)->maxchar \ |
|
|
&& (LENGTH) <= (WRITER)->size - (WRITER)->pos) \ |
|
|
? 0 \ |
|
|
: (((LENGTH) == 0) \ |
|
|
? 0 \ |
|
|
: _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR)))) |
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(int) |
|
|
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, |
|
|
Py_ssize_t length, Py_UCS4 maxchar); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \ |
|
|
(assert((KIND) != PyUnicode_WCHAR_KIND), \ |
|
|
(KIND) <= (WRITER)->kind \ |
|
|
? 0 \ |
|
|
: _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND))) |
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(int) |
|
|
_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer, |
|
|
enum PyUnicode_Kind kind); |
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(int) |
|
|
_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, |
|
|
Py_UCS4 ch |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(int) |
|
|
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, |
|
|
PyObject *str |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(int) |
|
|
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, |
|
|
PyObject *str, |
|
|
Py_ssize_t start, |
|
|
Py_ssize_t end |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(int) |
|
|
_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer, |
|
|
const char *str, |
|
|
Py_ssize_t len |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(int) |
|
|
_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer, |
|
|
const char *str, |
|
|
Py_ssize_t len |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(PyObject *) |
|
|
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer); |
|
|
|
|
|
|
|
|
PyAPI_FUNC(void) |
|
|
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter( |
|
|
_PyUnicodeWriter *writer, |
|
|
PyObject *obj, |
|
|
PyObject *format_spec, |
|
|
Py_ssize_t start, |
|
|
Py_ssize_t end); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode); |
|
|
|
|
|
#define _PyUnicode_AsString PyUnicode_AsUTF8 |
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7( |
|
|
PyObject *unicode, |
|
|
int base64SetO, |
|
|
int base64WhiteSpace, |
|
|
const char *errors |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String( |
|
|
PyObject *unicode, |
|
|
const char *errors); |
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32( |
|
|
PyObject *object, |
|
|
const char *errors, |
|
|
int byteorder |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16( |
|
|
PyObject* unicode, |
|
|
const char *errors, |
|
|
int byteorder |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeStateful( |
|
|
const char *string, |
|
|
Py_ssize_t length, |
|
|
const char *errors, |
|
|
Py_ssize_t *consumed |
|
|
); |
|
|
|
|
|
|
|
|
PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal( |
|
|
const char *string, |
|
|
Py_ssize_t length, |
|
|
const char *errors, |
|
|
Py_ssize_t *consumed, |
|
|
const char **first_invalid_escape |
|
|
|
|
|
|
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(PyObject*) _PyUnicode_DecodeRawUnicodeEscapeStateful( |
|
|
const char *string, |
|
|
Py_ssize_t length, |
|
|
const char *errors, |
|
|
Py_ssize_t *consumed |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String( |
|
|
PyObject* unicode, |
|
|
const char* errors); |
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString( |
|
|
PyObject* unicode, |
|
|
const char* errors); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap( |
|
|
PyObject *unicode, |
|
|
PyObject *mapping, |
|
|
const char *errors |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII( |
|
|
PyObject *unicode |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray( |
|
|
PyObject *separator, |
|
|
PyObject *const *items, |
|
|
Py_ssize_t seqlen |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_EqualToASCIIId( |
|
|
PyObject *left, |
|
|
_Py_Identifier *right |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString( |
|
|
PyObject *left, |
|
|
const char *right |
|
|
); |
|
|
|
|
|
|
|
|
PyAPI_FUNC(PyObject *) _PyUnicode_XStrip( |
|
|
PyObject *self, |
|
|
int striptype, |
|
|
PyObject *sepobj |
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping( |
|
|
_PyUnicodeWriter *writer, |
|
|
Py_ssize_t n_buffer, |
|
|
PyObject *digits, |
|
|
Py_ssize_t d_pos, |
|
|
Py_ssize_t n_digits, |
|
|
Py_ssize_t min_width, |
|
|
const char *grouping, |
|
|
PyObject *thousands_sep, |
|
|
Py_UCS4 *maxchar); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[]; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_IsLowercase( |
|
|
Py_UCS4 ch |
|
|
); |
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_IsUppercase( |
|
|
Py_UCS4 ch |
|
|
); |
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_IsTitlecase( |
|
|
Py_UCS4 ch |
|
|
); |
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_IsXidStart( |
|
|
Py_UCS4 ch |
|
|
); |
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_IsXidContinue( |
|
|
Py_UCS4 ch |
|
|
); |
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_IsWhitespace( |
|
|
const Py_UCS4 ch |
|
|
); |
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_IsLinebreak( |
|
|
const Py_UCS4 ch |
|
|
); |
|
|
|
|
|
PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase( |
|
|
Py_UCS4 ch |
|
|
); |
|
|
|
|
|
PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase( |
|
|
Py_UCS4 ch |
|
|
); |
|
|
|
|
|
Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase( |
|
|
Py_UCS4 ch |
|
|
); |
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_ToLowerFull( |
|
|
Py_UCS4 ch, |
|
|
Py_UCS4 *res |
|
|
); |
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_ToTitleFull( |
|
|
Py_UCS4 ch, |
|
|
Py_UCS4 *res |
|
|
); |
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_ToUpperFull( |
|
|
Py_UCS4 ch, |
|
|
Py_UCS4 *res |
|
|
); |
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_ToFoldedFull( |
|
|
Py_UCS4 ch, |
|
|
Py_UCS4 *res |
|
|
); |
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable( |
|
|
Py_UCS4 ch |
|
|
); |
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_IsCased( |
|
|
Py_UCS4 ch |
|
|
); |
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit( |
|
|
Py_UCS4 ch |
|
|
); |
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_ToDigit( |
|
|
Py_UCS4 ch |
|
|
); |
|
|
|
|
|
PyAPI_FUNC(double) _PyUnicode_ToNumeric( |
|
|
Py_UCS4 ch |
|
|
); |
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit( |
|
|
Py_UCS4 ch |
|
|
); |
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_IsDigit( |
|
|
Py_UCS4 ch |
|
|
); |
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_IsNumeric( |
|
|
Py_UCS4 ch |
|
|
); |
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_IsPrintable( |
|
|
Py_UCS4 ch |
|
|
); |
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_IsAlpha( |
|
|
Py_UCS4 ch |
|
|
); |
|
|
|
|
|
PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int); |
|
|
|
|
|
|
|
|
PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*); |
|
|
|
|
|
|
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *); |
|
|
|
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_Equal(PyObject *, PyObject *); |
|
|
|
|
|
PyAPI_FUNC(int) _PyUnicode_WideCharString_Converter(PyObject *, void *); |
|
|
PyAPI_FUNC(int) _PyUnicode_WideCharString_Opt_Converter(PyObject *, void *); |
|
|
|
|
|
PyAPI_FUNC(Py_ssize_t) _PyUnicode_ScanIdentifier(PyObject *); |
|
|
|