barakplasma
/

translategemma-4b-it-android-task-quantized

@@ -33,23 +33,33 @@ from litert_lm.runtime.proto import (
 )
-# Simple Jinja template compatible with LiteRT-LM runtime (no .get(), no complex tests).
-# Handles plain text input from Google AI Edge Gallery.
-# Uses the exact prompt format TranslateGemma was trained with (en→es default).
-# Users who need other language pairs should prefix their message with the pair,
-# e.g. "Translate English to French:\n\nHello"
-TRANSLATE_GEMMA_JINJA_TEMPLATE = \
 "{{ bos_token }}" \
 "{% for message in messages %}" \
 "{% if message['role'] == 'user' %}" \
 "<start_of_turn>user\n" \
-"You are a professional translator. " \
-"Produce only the translation of the following text, without any additional explanations or commentary:\n\n\n" \
-"{{ message['content'] | trim }}" \
 "<end_of_turn>\n" \
 "{% elif message['role'] == 'assistant' %}" \
 "<start_of_turn>model\n" \
-"{{ message['content'] | trim }}" \
 "<end_of_turn>\n" \
 "{% endif %}" \
 "{% endfor %}" \
@@ -57,26 +67,48 @@ TRANSLATE_GEMMA_JINJA_TEMPLATE = \
 "<start_of_turn>model\n" \
 "{% endif %}"
-def build_llm_metadata_proto(max_tokens: int) -> bytes:
-    meta = llm_metadata_pb2.LlmMetadata()
-    meta.max_num_tokens = max_tokens
-    # Model type: Gemma3 (text-only variant — no vision config needed for TranslateGemma text mode)
-    meta.llm_model_type.gemma3.CopyFrom(llm_model_type_pb2.Gemma3())
-    # Start token: BOS = token id 2
-    meta.start_token.token_ids.ids.append(2)
-    # Stop tokens: EOS (id=1) and end_of_turn (id=106)
-    eos = meta.stop_tokens.add()
-    eos.token_ids.ids.append(1)
-    eot = meta.stop_tokens.add()
-    eot.token_ids.ids.append(106)
-    # Embed the Jinja template
-    meta.jinja_prompt_template = TRANSLATE_GEMMA_JINJA_TEMPLATE
     return meta.SerializeToString()
@@ -84,7 +116,11 @@ def build_llm_metadata_proto(max_tokens: int) -> bytes:
 def main():
     ap = argparse.ArgumentParser(description="Bundle TFLite + tokenizer into .litertlm")
     ap.add_argument("--tflite", required=True)
-    ap.add_argument("--tokenizer", required=True, help="SentencePiece .model file")
     ap.add_argument("--output", required=True)
     ap.add_argument("--max-tokens", type=int, default=2048)
     ap.add_argument("--quant", default="int8", help="Quantization label for metadata")
@@ -104,7 +140,7 @@ def main():
     output_path.parent.mkdir(parents=True, exist_ok=True)
     # Write LlmMetadata to temp file
-    meta_bytes = build_llm_metadata_proto(args.max_tokens)
     with tempfile.NamedTemporaryFile(suffix=".pb", delete=False) as f:
         meta_file = Path(f.name)
         f.write(meta_bytes)
@@ -118,7 +154,8 @@ def main():
     DType = litertlm_builder.DType
     builder = litertlm_builder.LitertLmFileBuilder()
-    builder.add_system_metadata(Metadata(key="model_name", value=f"TranslateGemma-4B-IT-{args.quant}", dtype=DType.STRING))
     builder.add_system_metadata(Metadata(key="authors", value="google", dtype=DType.STRING))
     builder.add_system_metadata(Metadata(key="quantization", value=args.quant, dtype=DType.STRING))
@@ -126,7 +163,10 @@ def main():
         str(tflite_path),
         model_type=litertlm_builder.TfLiteModelType.PREFILL_DECODE,
     )
-    builder.add_sentencepiece_tokenizer(str(tokenizer_path))
     builder.add_llm_metadata(str(meta_file))
     with open(output_path, "wb") as f:

 )
+# Generic Jinja template for arbitrary language pair translation.
+# Supports structured XML-like input format: <src>LANG</src><dst>LANG</dst><text>TEXT</text>
+# Falls back to plain text if XML tags not provided.
+# Uses only Jinja2 features supported by LiteRT-LM runtime (no .get(), basic string ops).
+GENERIC_TRANSLATE_TEMPLATE = \
 "{{ bos_token }}" \
 "{% for message in messages %}" \
 "{% if message['role'] == 'user' %}" \
+"{% set content = message['content'] | trim %}" \
+"{% if '<src>' in content and '<dst>' in content and '<text>' in content %}" \
+"{% set src_part = content | split('<src>') | last | split('</src>') | first | trim %}" \
+"{% set dst_part = content | split('<dst>') | last | split('</dst>') | first | trim %}" \
+"{% set text_part = content | split('<text>') | last | split('</text>') | first | trim %}" \
+"<start_of_turn>user\n" \
+"Translate {{ src_part }} to {{ dst_part }}.\n" \
+"Produce only the translation, without explanations:\n\n\n" \
+"{{ text_part }}\n" \
+"<end_of_turn>\n" \
+"{% else %}" \
 "<start_of_turn>user\n" \
+"{{ content }}\n" \
 "<end_of_turn>\n" \
+"{% endif %}" \
 "{% elif message['role'] == 'assistant' %}" \
 "<start_of_turn>model\n" \
+"{{ message['content'] | trim }}\n" \
 "<end_of_turn>\n" \
 "{% endif %}" \
 "{% endfor %}" \
 "<start_of_turn>model\n" \
 "{% endif %}"
+TRANSLATE_GEMMA_JINJA_TEMPLATE = GENERIC_TRANSLATE_TEMPLATE
+# Qwen3 chat template (ChatML format, no-think mode via <think>\n\n</think> prefix)
+QWEN3_CHAT_TEMPLATE = \
+"{% for message in messages %}" \
+"{% if message['role'] == 'user' %}" \
+"<|im_start|>user\n{{ message['content'] | trim }}<|im_end|>\n" \
+"{% elif message['role'] == 'assistant' %}" \
+"<|im_start|>assistant\n{{ message['content'] | trim }}<|im_end|>\n" \
+"{% elif message['role'] == 'system' %}" \
+"<|im_start|>system\n{{ message['content'] | trim }}<|im_end|>\n" \
+"{% endif %}" \
+"{% endfor %}" \
+"{% if add_generation_prompt %}" \
+"<|im_start|>assistant\n<think>\n\n</think>\n" \
+"{% endif %}"
+def build_llm_metadata_proto(max_tokens: int, model_type: str = "gemma3") -> bytes:
+    meta = llm_metadata_pb2.LlmMetadata()
+    meta.max_num_tokens = max_tokens
+    if model_type == "qwen3":
+        meta.llm_model_type.qwen3.CopyFrom(llm_model_type_pb2.Qwen3())
+        # Qwen3 BOS: <|endoftext|> = 151643
+        meta.start_token.token_ids.ids.append(151643)
+        # Stop tokens: <|im_end|> = 151645, <|endoftext|> = 151643
+        for tid in [151645, 151643]:
+            st = meta.stop_tokens.add()
+            st.token_ids.ids.append(tid)
+        meta.jinja_prompt_template = QWEN3_CHAT_TEMPLATE
+    else:
+        # Model type: Gemma3 (text-only variant — no vision config needed for TranslateGemma text mode)
+        meta.llm_model_type.gemma3.CopyFrom(llm_model_type_pb2.Gemma3())
+        # Start token: BOS = token id 2
+        meta.start_token.token_ids.ids.append(2)
+        # Stop tokens: EOS (id=1) and end_of_turn (id=106)
+        eos = meta.stop_tokens.add()
+        eos.token_ids.ids.append(1)
+        eot = meta.stop_tokens.add()
+        eot.token_ids.ids.append(106)
+        meta.jinja_prompt_template = TRANSLATE_GEMMA_JINJA_TEMPLATE
     return meta.SerializeToString()
 def main():
     ap = argparse.ArgumentParser(description="Bundle TFLite + tokenizer into .litertlm")
     ap.add_argument("--tflite", required=True)
+    ap.add_argument("--tokenizer", required=True, help="SentencePiece .model or HF tokenizer.json")
+    ap.add_argument("--tokenizer-type", default="sp", choices=["sp", "hf"],
+                    help="sp=SentencePiece (default), hf=HuggingFace tokenizer.json")
+    ap.add_argument("--model-type", default="gemma3", choices=["gemma3", "qwen3"],
+                    help="LlmMetadata model type (gemma3=TranslateGemma, qwen3=DictaLM/Qwen3)")
     ap.add_argument("--output", required=True)
     ap.add_argument("--max-tokens", type=int, default=2048)
     ap.add_argument("--quant", default="int8", help="Quantization label for metadata")
     output_path.parent.mkdir(parents=True, exist_ok=True)
     # Write LlmMetadata to temp file
+    meta_bytes = build_llm_metadata_proto(args.max_tokens, model_type=args.model_type)
     with tempfile.NamedTemporaryFile(suffix=".pb", delete=False) as f:
         meta_file = Path(f.name)
         f.write(meta_bytes)
     DType = litertlm_builder.DType
     builder = litertlm_builder.LitertLmFileBuilder()
+    model_label = "DictaLM-3.0-1.7B" if args.model_type == "qwen3" else "TranslateGemma-4B-IT"
+    builder.add_system_metadata(Metadata(key="model_name", value=f"{model_label}-{args.quant}", dtype=DType.STRING))
     builder.add_system_metadata(Metadata(key="authors", value="google", dtype=DType.STRING))
     builder.add_system_metadata(Metadata(key="quantization", value=args.quant, dtype=DType.STRING))
         str(tflite_path),
         model_type=litertlm_builder.TfLiteModelType.PREFILL_DECODE,
     )
+    if args.tokenizer_type == "hf":
+        builder.add_hf_tokenizer(str(tokenizer_path))
+    else:
+        builder.add_sentencepiece_tokenizer(str(tokenizer_path))
     builder.add_llm_metadata(str(meta_file))
     with open(output_path, "wb") as f: