Add files using upload-large-folder tool

Files changed (10) hide show

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -7,15 +7,15 @@ extra_gated_prompt: To access Gemma on Hugging Face, you’re required to review
   agree to Google’s usage license. To do this, please ensure you’re logged in to Hugging
   Face and click below. Requests are processed immediately.
 extra_gated_button_content: Acknowledge license
-base_model: google/translategemma-27b-it
 tags:
 - mlx
 ---
-# mlx-community/translategemma-27b-it-8bit
-This model [mlx-community/translategemma-27b-it-8bit](https://huggingface.co/mlx-community/translategemma-27b-it-8bit) was
-converted to MLX format from [google/translategemma-27b-it](https://huggingface.co/google/translategemma-27b-it)
 using mlx-lm version **0.29.1**.
 ## Use with mlx
@@ -27,7 +27,7 @@ pip install mlx-lm
 ```python
 from mlx_lm import load, generate
-model, tokenizer = load("mlx-community/translategemma-27b-it-8bit")
 prompt = "hello"

   agree to Google’s usage license. To do this, please ensure you’re logged in to Hugging
   Face and click below. Requests are processed immediately.
 extra_gated_button_content: Acknowledge license
 tags:
 - mlx
+base_model: google/translategemma-12b-it
 ---
+# mlx-community/translategemma-12b-it-8bit
+This model [mlx-community/translategemma-12b-it-8bit](https://huggingface.co/mlx-community/translategemma-12b-it-8bit) was
+converted to MLX format from [google/translategemma-12b-it](https://huggingface.co/google/translategemma-12b-it)
 using mlx-lm version **0.29.1**.
 ## Use with mlx
 ```python
 from mlx_lm import load, generate
+model, tokenizer = load("mlx-community/translategemma-12b-it-8bit")
 prompt = "hello"

config.json CHANGED Viewed

@@ -26,11 +26,11 @@
         "attn_logit_softcapping": null,
         "dtype": "bfloat16",
         "final_logit_softcapping": null,
-        "head_dim": 128,
         "hidden_activation": "gelu_pytorch_tanh",
-        "hidden_size": 5376,
         "initializer_range": 0.02,
-        "intermediate_size": 21504,
         "layer_types": [
             "sliding_attention",
             "sliding_attention",
@@ -79,28 +79,14 @@
             "sliding_attention",
             "sliding_attention",
             "sliding_attention",
-            "full_attention",
-            "sliding_attention",
-            "sliding_attention",
-            "sliding_attention",
-            "sliding_attention",
-            "sliding_attention",
-            "full_attention",
-            "sliding_attention",
-            "sliding_attention",
-            "sliding_attention",
-            "sliding_attention",
-            "sliding_attention",
-            "full_attention",
-            "sliding_attention",
-            "sliding_attention"
         ],
         "max_position_embeddings": 131072,
         "model_type": "gemma3_text",
-        "num_attention_heads": 32,
-        "num_hidden_layers": 62,
-        "num_key_value_heads": 16,
-        "query_pre_attn_scalar": 168,
         "rms_norm_eps": 1e-06,
         "rope_local_base_freq": 10000,
         "rope_parameters": {

         "attn_logit_softcapping": null,
         "dtype": "bfloat16",
         "final_logit_softcapping": null,
+        "head_dim": 256,
         "hidden_activation": "gelu_pytorch_tanh",
+        "hidden_size": 3840,
         "initializer_range": 0.02,
+        "intermediate_size": 15360,
         "layer_types": [
             "sliding_attention",
             "sliding_attention",
             "sliding_attention",
             "sliding_attention",
             "sliding_attention",
+            "full_attention"
         ],
         "max_position_embeddings": 131072,
         "model_type": "gemma3_text",
+        "num_attention_heads": 16,
+        "num_hidden_layers": 48,
+        "num_key_value_heads": 8,
+        "query_pre_attn_scalar": 256,
         "rms_norm_eps": 1e-06,
         "rope_local_base_freq": 10000,
         "rope_parameters": {

model-00001-of-00003.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d375791d8b385012539bae9407566686e14b0009b106bfaf9997bc85f4b9069
+size 5356989205

model-00002-of-00003.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:28d257759581335491b1a32ec426a08166e63f773e52ad7b82535d8ffdda157b
+size 5352694354

model-00003-of-00003.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:314fd90fe022d1e6a6fd556eb0542bfa66b82fea2437a62cbc4ab078ee7cde3c
+size 1792614154

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.json ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
+size 33384568

tokenizer.model ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
+size 4689074

tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff