indiaai-hackathon
/
datalab
/web
/node_modules
/node-pty
/deps
/winpty
/src
/agent
/UnicodeEncoding.h
| // Copyright (c) 2015 Ryan Prichard | |
| // | |
| // Permission is hereby granted, free of charge, to any person obtaining a copy | |
| // of this software and associated documentation files (the "Software"), to | |
| // deal in the Software without restriction, including without limitation the | |
| // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | |
| // sell copies of the Software, and to permit persons to whom the Software is | |
| // furnished to do so, subject to the following conditions: | |
| // | |
| // The above copyright notice and this permission notice shall be included in | |
| // all copies or substantial portions of the Software. | |
| // | |
| // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
| // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
| // IN THE SOFTWARE. | |
| // Encode the Unicode codepoint with UTF-8. The buffer must be at least 4 | |
| // bytes in size. | |
| static inline int encodeUtf8(char *out, uint32_t code) { | |
| if (code < 0x80) { | |
| out[0] = code; | |
| return 1; | |
| } else if (code < 0x800) { | |
| out[0] = ((code >> 6) & 0x1F) | 0xC0; | |
| out[1] = ((code >> 0) & 0x3F) | 0x80; | |
| return 2; | |
| } else if (code < 0x10000) { | |
| if (code >= 0xD800 && code <= 0xDFFF) { | |
| // The code points 0xD800 to 0xDFFF are reserved for UTF-16 | |
| // surrogate pairs and do not have an encoding in UTF-8. | |
| return 0; | |
| } | |
| out[0] = ((code >> 12) & 0x0F) | 0xE0; | |
| out[1] = ((code >> 6) & 0x3F) | 0x80; | |
| out[2] = ((code >> 0) & 0x3F) | 0x80; | |
| return 3; | |
| } else if (code < 0x110000) { | |
| out[0] = ((code >> 18) & 0x07) | 0xF0; | |
| out[1] = ((code >> 12) & 0x3F) | 0x80; | |
| out[2] = ((code >> 6) & 0x3F) | 0x80; | |
| out[3] = ((code >> 0) & 0x3F) | 0x80; | |
| return 4; | |
| } else { | |
| // Encoding error | |
| return 0; | |
| } | |
| } | |
| // Encode the Unicode codepoint with UTF-16. The buffer must be large enough | |
| // to hold the output -- either 1 or 2 elements. | |
| static inline int encodeUtf16(wchar_t *out, uint32_t code) { | |
| if (code < 0x10000) { | |
| if (code >= 0xD800 && code <= 0xDFFF) { | |
| // The code points 0xD800 to 0xDFFF are reserved for UTF-16 | |
| // surrogate pairs and do not have an encoding in UTF-16. | |
| return 0; | |
| } | |
| out[0] = code; | |
| return 1; | |
| } else if (code < 0x110000) { | |
| code -= 0x10000; | |
| out[0] = 0xD800 | (code >> 10); | |
| out[1] = 0xDC00 | (code & 0x3FF); | |
| return 2; | |
| } else { | |
| // Encoding error | |
| return 0; | |
| } | |
| } | |
| // Return the byte size of a UTF-8 character using the value of the first | |
| // byte. | |
| static inline int utf8CharLength(char firstByte) { | |
| // This code would probably be faster if it used __builtin_clz. | |
| if ((firstByte & 0x80) == 0) { | |
| return 1; | |
| } else if ((firstByte & 0xE0) == 0xC0) { | |
| return 2; | |
| } else if ((firstByte & 0xF0) == 0xE0) { | |
| return 3; | |
| } else if ((firstByte & 0xF8) == 0xF0) { | |
| return 4; | |
| } else { | |
| // Malformed UTF-8. | |
| return 0; | |
| } | |
| } | |
| // The pointer must point to 1-4 bytes, as indicated by the first byte. | |
| // Returns -1 on decoding error. | |
| static inline uint32_t decodeUtf8(const char *in) { | |
| const uint32_t kInvalid = static_cast<uint32_t>(-1); | |
| switch (utf8CharLength(in[0])) { | |
| case 1: { | |
| return in[0]; | |
| } | |
| case 2: { | |
| if ((in[1] & 0xC0) != 0x80) { | |
| return kInvalid; | |
| } | |
| uint32_t tmp = 0; | |
| tmp = (in[0] & 0x1F) << 6; | |
| tmp |= (in[1] & 0x3F); | |
| return tmp <= 0x7F ? kInvalid : tmp; | |
| } | |
| case 3: { | |
| if ((in[1] & 0xC0) != 0x80 || | |
| (in[2] & 0xC0) != 0x80) { | |
| return kInvalid; | |
| } | |
| uint32_t tmp = 0; | |
| tmp = (in[0] & 0x0F) << 12; | |
| tmp |= (in[1] & 0x3F) << 6; | |
| tmp |= (in[2] & 0x3F); | |
| if (tmp <= 0x07FF || (tmp >= 0xD800 && tmp <= 0xDFFF)) { | |
| return kInvalid; | |
| } else { | |
| return tmp; | |
| } | |
| } | |
| case 4: { | |
| if ((in[1] & 0xC0) != 0x80 || | |
| (in[2] & 0xC0) != 0x80 || | |
| (in[3] & 0xC0) != 0x80) { | |
| return kInvalid; | |
| } | |
| uint32_t tmp = 0; | |
| tmp = (in[0] & 0x07) << 18; | |
| tmp |= (in[1] & 0x3F) << 12; | |
| tmp |= (in[2] & 0x3F) << 6; | |
| tmp |= (in[3] & 0x3F); | |
| if (tmp <= 0xFFFF || tmp > 0x10FFFF) { | |
| return kInvalid; | |
| } else { | |
| return tmp; | |
| } | |
| } | |
| default: { | |
| return kInvalid; | |
| } | |
| } | |
| } | |
| static inline uint32_t decodeSurrogatePair(wchar_t ch1, wchar_t ch2) { | |
| return ((ch1 - 0xD800) << 10) + (ch2 - 0xDC00) + 0x10000; | |
| } | |