rnnandi's picture
Add all files to convert gemma3 model to onnx
ca97aa9
import { GemmaTokenizer } from "../../../src/tokenizers.js";
import { BASE_TEST_STRINGS, LLAMA_TEST_STRINGS } from "../test_strings.js";
export const TOKENIZER_CLASS = GemmaTokenizer;
export const TEST_CONFIG = {
// Xenova/gemma-tokenizer
"Xenova/gemma2-tokenizer": {
SIMPLE: {
text: BASE_TEST_STRINGS.SIMPLE,
tokens: ["How", "\u2581are", "\u2581you", "\u2581doing", "?"],
ids: [2, 2299, 708, 692, 3900, 235336],
decoded: "<bos>How are you doing?",
},
SIMPLE_WITH_PUNCTUATION: {
text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
tokens: ["You", "\u2581should", "'", "ve", "\u2581done", "\u2581this"],
ids: [2, 2045, 1412, 235303, 524, 3015, 736],
decoded: "<bos>You should've done this",
},
NUMBERS: {
text: BASE_TEST_STRINGS.NUMBERS,
tokens: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "\u2581", "0", "\u2581", "1", "\u2581", "2", "\u2581", "3", "\u2581", "4", "\u2581", "5", "\u2581", "6", "\u2581", "7", "\u2581", "8", "\u2581", "9", "\u2581", "1", "0", "\u2581", "1", "0", "0", "\u2581", "1", "0", "0", "0"],
ids: [2, 235276, 235274, 235284, 235304, 235310, 235308, 235318, 235324, 235321, 235315, 235248, 235276, 235248, 235274, 235248, 235284, 235248, 235304, 235248, 235310, 235248, 235308, 235248, 235318, 235248, 235324, 235248, 235321, 235248, 235315, 235248, 235274, 235276, 235248, 235274, 235276, 235276, 235248, 235274, 235276, 235276, 235276],
decoded: "<bos>0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000",
},
TEXT_WITH_NUMBERS: {
text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
tokens: ["The", "\u2581company", "\u2581was", "\u2581founded", "\u2581in", "\u2581", "2", "0", "1", "6", "."],
ids: [2, 651, 3277, 729, 18942, 575, 235248, 235284, 235276, 235274, 235318, 235265],
decoded: "<bos>The company was founded in 2016.",
},
PUNCTUATION: {
text: BASE_TEST_STRINGS.PUNCTUATION,
tokens: ["A", "\n", "'", "ll", "\u2581!!", "to", "?'", "d", "''", "d", "\u2581of", ",", "\u2581can", "'", "t", "."],
ids: [2, 235280, 108, 235303, 529, 9063, 511, 18016, 235258, 3404, 235258, 576, 235269, 798, 235303, 235251, 235265],
decoded: "<bos>A\n'll !!to?'d''d of, can't.",
},
PYTHON_CODE: {
text: BASE_TEST_STRINGS.PYTHON_CODE,
tokens: ["def", "\u2581main", "():", "\n", "\t", "pass"],
ids: [2, 1293, 1872, 4409, 108, 226, 3095],
decoded: "<bos>def main():\n\tpass",
},
JAVASCRIPT_CODE: {
text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
tokens: ["let", "\u2581a", "\u2581=", "\u2581obj", ".", "toString", "();", "\n", "toString", "();"],
ids: [2, 1243, 476, 589, 6555, 235265, 7114, 821, 108, 7114, 821],
decoded: "<bos>let a = obj.toString();\ntoString();",
},
NEWLINES: {
text: LLAMA_TEST_STRINGS.NEWLINES,
tokens: ["ax", "\n", "####", "\n", "boo"],
ids: [2, 1247, 108, 3308, 108, 31931],
decoded: "<bos>ax\n####\nboo",
},
BASIC: {
text: BASE_TEST_STRINGS.BASIC,
tokens: ["UN", "want", "\u00e9d", ",", "running"],
ids: [2, 2019, 29007, 45346, 235269, 23655],
decoded: "<bos>UNwant\u00e9d,running",
},
CONTROL_TOKENS: {
text: BASE_TEST_STRINGS.CONTROL_TOKENS,
tokens: ["1", "<0x00>", "2", "\ufffd", "3"],
ids: [2, 235274, 217, 235284, 236193, 235304],
decoded: "<bos>1\u00002\ufffd3",
},
HELLO_WORLD_TITLECASE: {
text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
tokens: ["Hello", "\u2581World"],
ids: [2, 4521, 3855],
decoded: "<bos>Hello World",
},
HELLO_WORLD_LOWERCASE: {
text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
tokens: ["hello", "\u2581world"],
ids: [2, 17534, 2134],
decoded: "<bos>hello world",
},
CHINESE_ONLY: {
text: BASE_TEST_STRINGS.CHINESE_ONLY,
tokens: ["\u751f\u6d3b\u7684", "\u771f", "\u8c1b", "\u662f"],
ids: [2, 122182, 235710, 245467, 235427],
decoded: "<bos>\u751f\u6d3b\u7684\u771f\u8c1b\u662f",
},
LEADING_SPACE: {
text: BASE_TEST_STRINGS.LEADING_SPACE,
tokens: ["\u2581\u2581\u2581", "leading", "\u2581space"],
ids: [2, 140, 26650, 3641],
decoded: "<bos> leading space",
},
TRAILING_SPACE: {
text: BASE_TEST_STRINGS.TRAILING_SPACE,
tokens: ["trailing", "\u2581space", "\u2581\u2581\u2581"],
ids: [2, 100504, 3641, 140],
decoded: "<bos>trailing space ",
},
DOUBLE_SPACE: {
text: BASE_TEST_STRINGS.DOUBLE_SPACE,
tokens: ["Hi", "\u2581\u2581", "Hello"],
ids: [2, 2151, 139, 4521],
decoded: "<bos>Hi Hello",
},
CURRENCY: {
text: BASE_TEST_STRINGS.CURRENCY,
tokens: ["test", "\u2581$", "1", "\u2581R", "2", "\u2581#", "3", "\u2581\u20ac", "4", "\u2581\u00a3", "5", "\u2581\u00a5", "6", "\u2581", "\u20a3", "7", "\u2581\u20b9", "8", "\u2581", "\u20b1", "9", "\u2581test"],
ids: [2, 2195, 697, 235274, 625, 235284, 1700, 235304, 8296, 235310, 5955, 235308, 74393, 235318, 235248, 252058, 235324, 56712, 235321, 235248, 243132, 235315, 2121],
decoded: "<bos>test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test",
},
CURRENCY_WITH_DECIMALS: {
text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
tokens: ["I", "\u2581bought", "\u2581an", "\u2581apple", "\u2581for", "\u2581$", "1", ".", "0", "0", "\u2581at", "\u2581the", "\u2581store", "."],
ids: [2, 235285, 8989, 671, 15491, 604, 697, 235274, 235265, 235276, 235276, 696, 573, 4659, 235265],
decoded: "<bos>I bought an apple for $1.00 at the store.",
},
ELLIPSIS: {
text: BASE_TEST_STRINGS.ELLIPSIS,
tokens: ["you", "\u2026", "\u2581\u2581"],
ids: [2, 4747, 235417, 139],
decoded: "<bos>you\u2026 ",
},
TEXT_WITH_ESCAPE_CHARACTERS: {
text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
tokens: ["you", "\u2026", "\u00a0\u00a0"],
ids: [2, 4747, 235417, 25445],
decoded: "<bos>you\u2026\u00a0\u00a0",
},
TEXT_WITH_ESCAPE_CHARACTERS_2: {
text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
tokens: ["you", "\u2026", "\u00a0\u00a0", "you", "\u2026", "\u00a0\u00a0"],
ids: [2, 4747, 235417, 25445, 4747, 235417, 25445],
decoded: "<bos>you\u2026\u00a0\u00a0you\u2026\u00a0\u00a0",
},
TILDE_NORMALIZATION: {
text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
tokens: ["weird", "\u2581\uff5e", "\u2581edge", "\u2581\uff5e", "\u2581case"],
ids: [2, 102422, 134012, 8541, 134012, 2270],
decoded: "<bos>weird \uff5e edge \uff5e case",
},
SPIECE_UNDERSCORE: {
text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
tokens: ["\u2581This", "\u2581\u2581", "is", "\u2581\u2581", "a", "\u2581\u2581", "test", "\u2581\u2581", "."],
ids: [2, 1417, 139, 502, 139, 235250, 139, 2195, 139, 235265],
decoded: "<bos> This is a test .",
},
POPULAR_EMOJIS: {
text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
tokens: ["\ud83d\ude02", "\u2581\ud83d\udc4d", "\u2581\ud83e\udd23", "\u2581\ud83d\ude0d", "\u2581\ud83d\ude2d", "\u2581\ud83c\udf89", "\u2581\ud83d\ude4f", "\u2581\ud83d\ude0a", "\u2581\ud83d\udd25", "\u2581\ud83d\ude01", "\u2581\ud83d\ude05", "\u2581\ud83e\udd17", "\u2581\ud83d\ude06", "\u2581\ud83d\udc4f", "\u2581\u2764\ufe0f", "\u2581\ud83d\udc9c", "\u2581\ud83d\udc9a", "\u2581\ud83d\udc97", "\u2581\ud83d\udc99", "\u2581\ud83d\udda4", "\u2581\ud83d\ude0e", "\u2581\ud83d\udc4c", "\u2581\ud83e\udd73", "\u2581\ud83d\udcaa", "\u2581\u2728", "\u2581\ud83d\udc49", "\u2581\ud83d\udc40", "\u2581\ud83d\udcaf", "\u2581", "\ud83c\udf88", "\u2581", "\ud83d\ude48", "\u2581\ud83d\ude4c", "\u2581\ud83d\udc80", "\u2581\ud83d\udc47", "\u2581\ud83d\udc4b", "\u2581\u2705", "\u2581", "\ud83c\udf81", "\u2581", "\ud83c\udf1e", "\u2581\ud83c\udf38", "\u2581", "\ud83d\udcb0"],
ids: [2, 236471, 38104, 55937, 46434, 55605, 160588, 68226, 44416, 72373, 70636, 75298, 156808, 120433, 104492, 35373, 131674, 191384, 204903, 146773, 166620, 87949, 83860, 211978, 142816, 64726, 166368, 108892, 174882, 235248, 242431, 235248, 241259, 134540, 106918, 154601, 169692, 92641, 235248, 241227, 235248, 241971, 233958, 235248, 241034],
decoded: "<bos>\ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d \ud83c\udf89 \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 \ud83d\ude05 \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764\ufe0f \ud83d\udc9c \ud83d\udc9a \ud83d\udc97 \ud83d\udc99 \ud83d\udda4 \ud83d\ude0e \ud83d\udc4c \ud83e\udd73 \ud83d\udcaa \u2728 \ud83d\udc49 \ud83d\udc40 \ud83d\udcaf \ud83c\udf88 \ud83d\ude48 \ud83d\ude4c \ud83d\udc80 \ud83d\udc47 \ud83d\udc4b \u2705 \ud83c\udf81 \ud83c\udf1e \ud83c\udf38 \ud83d\udcb0",
},
MULTIBYTE_EMOJIS: {
text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
tokens: ["\u2728", "\u2581\ud83e\udd17", "\u2581", "\ud83d\udc41", "\ufe0f", "\u2581", "\ud83d\udc71", "\ud83c\udffb", "\u2581", "\ud83d\udd75", "\u200d\u2642\ufe0f", "\u2581", "\ud83e\uddd9", "\ud83c\udffb", "\u200d\u2642", "\u2581", "\ud83d\udc68", "\ud83c\udffb", "\u200d", "\ud83c\udf3e", "\u2581", "\ud83e\uddd1", "\u200d", "\ud83e\udd1d", "\u200d", "\ud83e\uddd1", "\u2581", "\ud83d\udc69", "\u200d", "\u2764", "\u200d", "\ud83d\udc8b", "\u200d", "\ud83d\udc68", "\u2581", "\ud83d\udc69", "\u200d", "\ud83d\udc69", "\u200d", "\ud83d\udc67", "\u200d", "\ud83d\udc66", "\u2581", "\ud83e\uddd1", "\ud83c\udffb", "\u200d", "\ud83e\udd1d", "\u200d", "\ud83e\uddd1", "\ud83c\udffb", "\u2581", "\ud83c\udff4", "\udb40\udc67", "\udb40\udc62", "\udb40\udc65", "\udb40\udc6e", "\udb40\udc67", "\udb40\udc7f", "\u2581", "\ud83d\udc68", "\ud83c\udffb", "\u200d", "\u2764\ufe0f", "\u200d", "\ud83d\udc8b", "\u200d", "\ud83d\udc68", "\ud83c\udffc"],
ids: [2, 236309, 156808, 235248, 241666, 235969, 235248, 247216, 237933, 235248, 246522, 68399, 235248, 246422, 237933, 63233, 235248, 241568, 237933, 235879, 244448, 235248, 243634, 235879, 241668, 235879, 243634, 235248, 241355, 235879, 236457, 235879, 240887, 235879, 241568, 235248, 241355, 235879, 241355, 235879, 244355, 235879, 244670, 235248, 243634, 237933, 235879, 241668, 235879, 243634, 237933, 235248, 244443, 246738, 247704, 250142, 250123, 246738, 247662, 235248, 241568, 237933, 235879, 16176, 235879, 240887, 235879, 241568, 238683],
decoded: "<bos>\u2728 \ud83e\udd17 \ud83d\udc41\ufe0f \ud83d\udc71\ud83c\udffb \ud83d\udd75\u200d\u2642\ufe0f \ud83e\uddd9\ud83c\udffb\u200d\u2642 \ud83d\udc68\ud83c\udffb\u200d\ud83c\udf3e \ud83e\uddd1\u200d\ud83e\udd1d\u200d\ud83e\uddd1 \ud83d\udc69\u200d\u2764\u200d\ud83d\udc8b\u200d\ud83d\udc68 \ud83d\udc69\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66 \ud83e\uddd1\ud83c\udffb\u200d\ud83e\udd1d\u200d\ud83e\uddd1\ud83c\udffb \ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f \ud83d\udc68\ud83c\udffb\u200d\u2764\ufe0f\u200d\ud83d\udc8b\u200d\ud83d\udc68\ud83c\udffc",
},
BPE_SCORES_PRIORITY_1: {
text: LLAMA_TEST_STRINGS.BPE_SCORES_PRIORITY_1,
tokens: ["grab", "bed"],
ids: [2, 59031, 2907],
decoded: "<bos>grabbed",
},
BPE_SCORES_PRIORITY_2: {
text: LLAMA_TEST_STRINGS.BPE_SCORES_PRIORITY_2,
tokens: ["\u2581grabbed"],
ids: [2, 41939],
decoded: "<bos> grabbed",
},
BPE_SCORES_PRIORITY_3: {
text: LLAMA_TEST_STRINGS.BPE_SCORES_PRIORITY_3,
tokens: ["\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581", "grab", "bed"],
ids: [2, 148, 59031, 2907],
decoded: "<bos> grabbed",
},
NEWLINE: {
text: LLAMA_TEST_STRINGS.NEWLINE,
tokens: ["\n"],
ids: [2, 108],
decoded: "<bos>\n",
},
NEWLINE_WITH_LEADING_SPACE: {
text: LLAMA_TEST_STRINGS.NEWLINE_WITH_LEADING_SPACE,
tokens: ["\u2581", "\n"],
ids: [2, 235248, 108],
decoded: "<bos> \n",
},
TABS: {
text: LLAMA_TEST_STRINGS.TABS,
tokens: ["\t", "tabs", "\t\t\t\t", "out", "\u2581here"],
ids: [2, 226, 31973, 255971, 745, 1517],
decoded: "<bos>\ttabs\t\t\t\tout here",
},
NEWLINE_AND_TAB: {
text: LLAMA_TEST_STRINGS.NEWLINE_AND_TAB,
tokens: ["\n", "\t", "\n"],
ids: [2, 108, 226, 108],
decoded: "<bos>\n\t\n",
},
CHINESE_LETTER: {
text: LLAMA_TEST_STRINGS.CHINESE_LETTER,
tokens: ["\u9547"],
ids: [2, 237796],
decoded: "<bos>\u9547",
},
EMOJIS_1: {
text: LLAMA_TEST_STRINGS.EMOJIS_1,
tokens: ["\ud83e\udd99"],
ids: [2, 250645],
decoded: "<bos>\ud83e\udd99",
},
EMOJIS_2: {
text: LLAMA_TEST_STRINGS.EMOJIS_2,
tokens: ["\ud83e\udd99", "<0xEA>", "<0x99>", "<0x8A>"],
ids: [2, 250645, 451, 370, 355],
decoded: "<bos>\ud83e\udd99\ua64a",
},
EMOJIS_3: {
text: LLAMA_TEST_STRINGS.EMOJIS_3,
tokens: ["<0xEA>", "<0x99>", "<0x8A>", "\ud83e\udd99"],
ids: [2, 451, 370, 355, 250645],
decoded: "<bos>\ua64a\ud83e\udd99",
},
PARAGRAPH: {
text: LLAMA_TEST_STRINGS.PARAGRAPH,
tokens: ["The", "\u2581llama", "\u2581(/", "\u02c8", "l", "\u0251", "\u02d0", "m\u0259", "/;", "\u2581", "\ud83e\udd99", "Spanish", "\u2581pronunciation", ":", "\u2581[", "\u02c8", "\u028e", "ama", "])", "\u2581(", "Lama", "\u2581g", "lama", ")", "\u2581is", "\u2581a", "\u2581domesticated", "\u2581South", "\u2581American", "\u2581came", "lid", ",", "\u2581widely", "\u2581used", "\u2581as", "\u2581a", "\u2581meat", "\u2581and", "\u2581pack", "\u2581animal", "\u2581by", "\u2581Andean", "\u2581cultures", "\u2581since", "\u2581the", "\u2581Pre", "-", "Columb", "ian", "\u2581era", ".", "\u2581Lla", "mas", "\u2581are", "\u2581social", "\u2581animals", "\u2581and", "\u2581live", "\u2581with", "\u2581others", "\u2581as", "\u2581a", "\u2581herd", ".", "\u2581Their", "\u2581wool", "\u2581is", "\u2581soft", "\u2581and", "\u2581contains", "\u2581only", "\u2581a", "\u2581small", "\u2581amount", "\u2581of", "\u2581lan", "olin", ".[", "2", "]", "\u2581Lla", "mas", "\u2581can", "\u2581learn", "\u2581simple", "\u2581tasks", "\u2581after", "\u2581a", "\u2581few", "\u2581repetitions", ".", "\u2581When", "\u2581using", "\u2581a", "\u2581pack", ",", "\u2581they", "\u2581can", "\u2581carry", "\u2581about", "\u2581", "2", "5", "\u2581to", "\u2581", "3", "0", "%", "\u2581of", "\u2581their", "\u2581body", "\u2581weight", "\u2581for", "\u2581", "8", "\u2581to", "\u2581", "1", "3", "\u2581km", "\u2581(", "5", "\u2013", "8", "\u2581miles", ").[", "3", "]", "\u2581The", "\u2581name", "\u2581llama", "\u2581(", "in", "\u2581the", "\u2581past", "\u2581also", "\u2581spelled", '\u2581"', "lama", '"', "\u2581or", '\u2581"', "g", "lama", '")', "\u2581was", "\u2581adopted", "\u2581by", "\u2581European", "\u2581settlers", "\u2581from", "\u2581native", "\u2581Peru", "vi", "ans", ".[", "4", "]", "\u2581The", "\u2581ancestors", "\u2581of", "\u2581llamas", "\u2581are", "\u2581thought", "\u2581to", "\u2581have", "\u2581originated", "\u2581from", "\u2581the", "\u2581Great", "\u2581Plains", "\u2581of", "\u2581North", "\u2581America", "\u2581about", "\u2581", "4", "0", "\u2581million", "\u2581years", "\u2581ago", ",", "\u2581and", "\u2581subsequently", "\u2581migrated", "\u2581to", "\u2581South", "\u2581America", "\u2581about", "\u2581three", "\u2581million", "\u2581years", "\u2581ago", "\u2581during", "\u2581the", "\u2581Great", "\u2581American", "\u2581Interchange", ".", "\u2581By", "\u2581the", "\u2581end", "\u2581of", "\u2581the", "\u2581last", "\u2581ice", "\u2581age", "\u2581(", "1", "0", ",", "0", "0", "0", "\u2013", "1", "2", ",", "0", "0", "0", "\u2581years", "\u2581ago", "),", "\u2581came", "lids", "\u2581were", "\u2581extinct", "\u2581in", "\u2581North", "\u2581America", ".[", "3", "]", "\u2581As", "\u2581of", "\u2581", "2", "0", "0", "7", ",", "\u2581there", "\u2581were", "\u2581over", "\u2581seven", "\u2581million", "\u2581llamas", "\u2581and", "\u2581al", "pac", "as", "\u2581in", "\u2581South", "\u2581America", "\u2581and", "\u2581over", "\u2581", "1", "5", "8", ",", "0", "0", "0", "\u2581llamas", "\u2581and", "\u2581", "1", "0", "0", ",", "0", "0", "0", "<0xEA>", "<0x99>", "<0x8A>", "\ud83e\udd99", "\u2581al", "pac", "as", ",", "\u2581descended", "\u2581from", "\u2581progen", "itors", "\u2581imported", "\u2581late", "\u2581in", "\u2581the", "\u2581", "2", "0", "th", "\u2581century", ",", "\u2581in", "\u2581the", "\u2581United", "\u2581States", "\u2581and", "\u2581Canada", ".[", "5", "]", "\u2581In", "\u2581A", "ym", "ara", "\u2581mythology", ",", "\u2581llamas", "\u2581are", "\u2581important", "\u2581beings", ".", "\u2581The", "\u2581Heavenly", "\u2581Llama", "\u2581is", "\u2581said", "\u2581to", "\u2581drink", "\u2581water", "\u2581from", "\u2581the", "\u2581ocean", "\u2581and", "\u2581urin", "ates", "\u2581as", "\u2581it", "\u2581rains", ".[", "6", "]", "\u2581According", "\u2581to", "\u2581A", "ym", "ara", "\u2581es", "ch", "atology", ",", "\u2581llamas", "\u2581will", "\u2581return", "\u2581to", "\u2581the", "\u2581water", "\u2581springs", "\u2581and", "\u2581lagoons", "\u2581where", "\u2581they", "\u2581come", "\u2581from", "\u2581at", "\u2581the", "\u2581end", "\u2581of", "\u2581time", ".[", "6", "]"],
ids: [2, 651, 19001, 101949, 239229, 235257, 240527, 240342, 128631, 102430, 235248, 250645, 51590, 74569, 235292, 892, 239229, 246752, 2867, 3013, 591, 221520, 583, 10450, 235275, 603, 476, 183304, 4316, 3725, 3392, 3353, 235269, 16714, 1671, 685, 476, 11827, 578, 3386, 8205, 731, 207552, 24541, 2754, 573, 2769, 235290, 222963, 1282, 6063, 235265, 172809, 2616, 708, 3127, 8398, 578, 3685, 675, 3588, 685, 476, 48010, 235265, 10368, 23834, 603, 4072, 578, 7744, 1297, 476, 2301, 3619, 576, 7607, 28424, 19047, 235284, 235307, 172809, 2616, 798, 3918, 3890, 13333, 1452, 476, 2619, 126286, 235265, 3194, 2177, 476, 3386, 235269, 984, 798, 6383, 1105, 235248, 235284, 235308, 577, 235248, 235304, 235276, 235358, 576, 1024, 2971, 5171, 604, 235248, 235321, 577, 235248, 235274, 235304, 5821, 591, 235308, 235389, 235321, 7112, 232524, 235304, 235307, 714, 1503, 19001, 591, 473, 573, 3433, 1170, 73003, 664, 10450, 235281, 689, 664, 235264, 10450, 1388, 729, 13861, 731, 7737, 57710, 774, 11634, 30160, 893, 779, 19047, 235310, 235307, 714, 44106, 576, 129953, 708, 3421, 577, 791, 52102, 774, 573, 6553, 55118, 576, 4612, 5783, 1105, 235248, 235310, 235276, 4416, 1658, 3958, 235269, 578, 27956, 106398, 577, 4316, 5783, 1105, 2149, 4416, 1658, 3958, 2290, 573, 6553, 3725, 193879, 235265, 3339, 573, 1580, 576, 573, 2001, 8357, 3911, 591, 235274, 235276, 235269, 235276, 235276, 235276, 235389, 235274, 235284, 235269, 235276, 235276, 235276, 1658, 3958, 823, 3392, 41253, 1049, 78561, 575, 4612, 5783, 19047, 235304, 235307, 1877, 576, 235248, 235284, 235276, 235276, 235324, 235269, 1104, 1049, 1163, 6861, 4416, 129953, 578, 717, 23337, 508, 575, 4316, 5783, 578, 1163, 235248, 235274, 235308, 235321, 235269, 235276, 235276, 235276, 129953, 578, 235248, 235274, 235276, 235276, 235269, 235276, 235276, 235276, 451, 370, 355, 250645, 717, 23337, 508, 235269, 64700, 774, 66279, 15517, 29271, 5245, 575, 573, 235248, 235284, 235276, 489, 7861, 235269, 575, 573, 3520, 3858, 578, 6591, 19047, 235308, 235307, 878, 586, 3985, 1610, 76701, 235269, 129953, 708, 2845, 27290, 235265, 714, 89830, 170669, 603, 1180, 577, 7182, 2003, 774, 573, 13940, 578, 111204, 1204, 685, 665, 50852, 19047, 235318, 235307, 11926, 577, 586, 3985, 1610, 875, 530, 92764, 235269, 129953, 877, 2203, 577, 573, 2003, 31104, 578, 221493, 1570, 984, 2063, 774, 696, 573, 1580, 576, 1069, 19047, 235318, 235307],
decoded: '<bos>The llama (/\u02c8l\u0251\u02d0m\u0259/; \ud83e\udd99Spanish pronunciation: [\u02c8\u028eama]) (Lama glama) is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the Pre-Columbian era. Llamas are social animals and live with others as a herd. Their wool is soft and contains only a small amount of lanolin.[2] Llamas can learn simple tasks after a few repetitions. When using a pack, they can carry about 25 to 30% of their body weight for 8 to 13 km (5\u20138 miles).[3] The name llama (in the past also spelled "lama" or "glama") was adopted by European settlers from native Peruvians.[4] The ancestors of llamas are thought to have originated from the Great Plains of North America about 40 million years ago, and subsequently migrated to South America about three million years ago during the Great American Interchange. By the end of the last ice age (10,000\u201312,000 years ago), camelids were extinct in North America.[3] As of 2007, there were over seven million llamas and alpacas in South America and over 158,000 llamas and 100,000\ua64a\ud83e\udd99 alpacas, descended from progenitors imported late in the 20th century, in the United States and Canada.[5] In Aymara mythology, llamas are important beings. The Heavenly Llama is said to drink water from the ocean and urinates as it rains.[6] According to Aymara eschatology, llamas will return to the water springs and lagoons where they come from at the end of time.[6]',
},
},
};