Upload scripts/bundle_litertlm.py with huggingface_hub
Browse files- scripts/bundle_litertlm.py +23 -100
scripts/bundle_litertlm.py
CHANGED
|
@@ -33,106 +33,29 @@ from litert_lm.runtime.proto import (
|
|
| 33 |
)
|
| 34 |
|
| 35 |
|
| 36 |
-
#
|
| 37 |
-
#
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
"lb": "Luxembourgish", "lg": "Ganda", "ln": "Lingala", "lo": "Lao",
|
| 60 |
-
"lt": "Lithuanian", "lu": "Luba-Katanga", "lv": "Latvian", "mg": "Malagasy",
|
| 61 |
-
"mi": "Maori", "mk": "Macedonian", "ml": "Malayalam", "mn": "Mongolian",
|
| 62 |
-
"mr": "Marathi", "ms": "Malay", "mt": "Maltese", "my": "Burmese",
|
| 63 |
-
"nb": "Norwegian Bokmål", "nd": "North Ndebele", "ne": "Nepali",
|
| 64 |
-
"nl": "Dutch", "nn": "Norwegian Nynorsk", "no": "Norwegian",
|
| 65 |
-
"nr": "South Ndebele", "nv": "Navajo", "ny": "Chichewa", "oc": "Occitan",
|
| 66 |
-
"om": "Oromo", "or": "Oriya", "os": "Ossetian", "pa": "Punjabi",
|
| 67 |
-
"pl": "Polish", "ps": "Pashto", "pt": "Portuguese", "qu": "Quechua",
|
| 68 |
-
"rm": "Romansh", "rn": "Rundi", "ro": "Romanian", "ru": "Russian",
|
| 69 |
-
"rw": "Kinyarwanda", "sa": "Sanskrit", "sc": "Sardinian", "sd": "Sindhi",
|
| 70 |
-
"se": "Northern Sami", "sg": "Sango", "si": "Sinhala", "sk": "Slovak",
|
| 71 |
-
"sl": "Slovenian", "sn": "Shona", "so": "Somali", "sq": "Albanian",
|
| 72 |
-
"sr": "Serbian", "ss": "Swati", "st": "Southern Sotho", "su": "Sundanese",
|
| 73 |
-
"sv": "Swedish", "sw": "Swahili", "ta": "Tamil", "te": "Telugu",
|
| 74 |
-
"tg": "Tajik", "th": "Thai", "ti": "Tigrinya", "tk": "Turkmen",
|
| 75 |
-
"tl": "Tagalog", "tn": "Tswana", "to": "Tonga", "tr": "Turkish",
|
| 76 |
-
"ts": "Tsonga", "tt": "Tatar", "ug": "Uyghur", "uk": "Ukrainian",
|
| 77 |
-
"ur": "Urdu", "uz": "Uzbek", "ve": "Venda", "vi": "Vietnamese",
|
| 78 |
-
"vo": "Volapük", "wa": "Walloon", "wo": "Wolof", "xh": "Xhosa",
|
| 79 |
-
"yi": "Yiddish", "yo": "Yoruba", "za": "Zhuang", "zh": "Chinese",
|
| 80 |
-
"zu": "Zulu"
|
| 81 |
-
} -%}
|
| 82 |
-
{{ bos_token }}
|
| 83 |
-
{%- if (messages[0]['role'] != 'user') -%}
|
| 84 |
-
{{ raise_exception("Conversations must start with a user prompt.") }}
|
| 85 |
-
{%- endif -%}
|
| 86 |
-
{%- for message in messages -%}
|
| 87 |
-
{%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
|
| 88 |
-
{{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
|
| 89 |
-
{%- endif -%}
|
| 90 |
-
{%- if (message['role'] == 'assistant') -%}
|
| 91 |
-
{%- if message['content'] is none or message['content'] is not string -%}
|
| 92 |
-
{{ raise_exception("Assistant role must provide content as a string") }}
|
| 93 |
-
{%- endif -%}
|
| 94 |
-
{{ '<start_of_turn>model\\n'}}
|
| 95 |
-
{{ message["content"] | trim }}
|
| 96 |
-
{%- elif (message['role'] == 'user') -%}
|
| 97 |
-
{%- set content = message["content"] if message["content"] is string else message["content"][0] -%}
|
| 98 |
-
{%- if content is string -%}
|
| 99 |
-
{%- set source_lang = "English" -%}
|
| 100 |
-
{%- set source_lang_code = "en" -%}
|
| 101 |
-
{%- set target_lang = "Spanish" -%}
|
| 102 |
-
{%- set target_lang_code = "es" -%}
|
| 103 |
-
{{ '<start_of_turn>user\\nYou are a professional ' + source_lang + ' (' + source_lang_code + ') to ' +
|
| 104 |
-
target_lang + ' (' + target_lang_code + ') translator. Your goal is to accurately convey the meaning ' +
|
| 105 |
-
'and nuances of the original ' + source_lang + ' text while adhering to ' + target_lang + ' grammar, ' +
|
| 106 |
-
'vocabulary, and cultural sensitivities.\\n' +
|
| 107 |
-
'Produce only the ' + target_lang + ' translation, without any additional explanations or ' +
|
| 108 |
-
'commentary. Please translate the following ' + source_lang + ' text into ' + target_lang + ':\\n\\n\\n' +
|
| 109 |
-
content | trim
|
| 110 |
-
}}
|
| 111 |
-
{%- else -%}
|
| 112 |
-
{%- set source_lang_code = content["source_lang_code"] | replace("_", "-") -%}
|
| 113 |
-
{%- set source_lang = languages.get(source_lang_code, source_lang_code) -%}
|
| 114 |
-
{%- set target_lang_code = content["target_lang_code"] | replace("_", "-") -%}
|
| 115 |
-
{%- set target_lang = languages.get(target_lang_code, target_lang_code) -%}
|
| 116 |
-
{{ '<start_of_turn>user\\nYou are a professional ' + source_lang + ' (' + source_lang_code + ') to ' +
|
| 117 |
-
target_lang + ' (' + target_lang_code + ') translator. Your goal is to accurately convey the meaning ' +
|
| 118 |
-
'and nuances of the original ' + source_lang + ' text while adhering to ' + target_lang + ' grammar, ' +
|
| 119 |
-
'vocabulary, and cultural sensitivities.\\n'
|
| 120 |
-
}}
|
| 121 |
-
{%- if content["type"] == 'text' -%}
|
| 122 |
-
{{ 'Produce only the ' + target_lang + ' translation, without any additional explanations or ' +
|
| 123 |
-
'commentary. Please translate the following ' + source_lang + ' text into ' + target_lang + ':\\n\\n\\n' +
|
| 124 |
-
content["text"] | trim
|
| 125 |
-
}}
|
| 126 |
-
{%- endif -%}
|
| 127 |
-
{%- endif -%}
|
| 128 |
-
{%- else -%}
|
| 129 |
-
{{ raise_exception("Conversations must only contain user or assistant roles.") }}
|
| 130 |
-
{%- endif -%}
|
| 131 |
-
{{ '<end_of_turn>\\n' }}
|
| 132 |
-
{%- endfor -%}
|
| 133 |
-
{%- if add_generation_prompt -%}
|
| 134 |
-
{{'<start_of_turn>model\\n'}}
|
| 135 |
-
{%- endif -%}"""
|
| 136 |
|
| 137 |
|
| 138 |
def build_llm_metadata_proto(max_tokens: int) -> bytes:
|
|
|
|
| 33 |
)
|
| 34 |
|
| 35 |
|
| 36 |
+
# Simple Jinja template compatible with LiteRT-LM runtime (no .get(), no complex tests).
|
| 37 |
+
# Handles plain text input from Google AI Edge Gallery.
|
| 38 |
+
# Uses the exact prompt format TranslateGemma was trained with (en→es default).
|
| 39 |
+
# Users who need other language pairs should prefix their message with the pair,
|
| 40 |
+
# e.g. "Translate English to French:\n\nHello"
|
| 41 |
+
TRANSLATE_GEMMA_JINJA_TEMPLATE = \
|
| 42 |
+
"{{ bos_token }}" \
|
| 43 |
+
"{% for message in messages %}" \
|
| 44 |
+
"{% if message['role'] == 'user' %}" \
|
| 45 |
+
"<start_of_turn>user\n" \
|
| 46 |
+
"You are a professional translator. " \
|
| 47 |
+
"Produce only the translation of the following text, without any additional explanations or commentary:\n\n\n" \
|
| 48 |
+
"{{ message['content'] | trim }}" \
|
| 49 |
+
"<end_of_turn>\n" \
|
| 50 |
+
"{% elif message['role'] == 'assistant' %}" \
|
| 51 |
+
"<start_of_turn>model\n" \
|
| 52 |
+
"{{ message['content'] | trim }}" \
|
| 53 |
+
"<end_of_turn>\n" \
|
| 54 |
+
"{% endif %}" \
|
| 55 |
+
"{% endfor %}" \
|
| 56 |
+
"{% if add_generation_prompt %}" \
|
| 57 |
+
"<start_of_turn>model\n" \
|
| 58 |
+
"{% endif %}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
|
| 61 |
def build_llm_metadata_proto(max_tokens: int) -> bytes:
|