Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

.gitattributes +13 -0
README.md +2 -27
added_tokens.json +3 -0
chat_template.json +3 -0
config.json +80 -2
preprocessor_config.json +29 -0
processor_config.json +4 -0
special_tokens_map.json +33 -0
tokenizer.model +3 -0

.gitattributes CHANGED Viewed

@@ -34,3 +34,16 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text
+model-00004-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00005-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00006-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00007-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00008-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00009-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00001-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00003-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00010-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00011-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00012-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00002-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text
+tokenizer.model filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,32 +1,7 @@
 ---
 language: en
-library_name: mlx
-pipeline_tag: text-generation
 tags:
 - mlx
 ---
-An experimental ablation of Gemma-3-27B-it, using the [Heretic](https://github.com/p-e-w/heretic) tool.
-Compared to the standard configuration of Heretic, there are a few changes:
-1. The training and test datasets used were extended compared to the default subset used by Heretic
-2. A version of [Magnitude-Preserving Orthogonal Ablation](https://huggingface.co/blog/grimjim/norm-preserving-biprojected-abliteration) (MPOA) is used
-3. To stay faithful to MPOA, the harmful direction to ablate is chosen from between 2 layers (Heretic's "global" direction scope)
-4. To stay faithful to MPOA, a 99% winsorization is applied to the residuals
-5. Some additional refusal markers were added to avoid bypassing the refusal detection with bad punctuation
-To achieve strong results:
-1. Parameter ranges were iteratively refined by looking at resulting refusal and divergence scores
-2. The scoring function was adjusted to prioritize low-refusal results
-The model name contains the properties of the ablation:
-1. `MPOA` for the usage of Magnitude-Preserving Orthogonal Ablation
-2. `G` for the usage of global direction scope
-3. `W` for the usage of winsorization
-4. `D` for the measured KL divergence
-5. `R` for the number of refusals
-Original: https://huggingface.co/spikymoth/G3-Heresy-MPOA-G-W99-D0.0690-R02
-GGUF (standard): https://huggingface.co/spikymoth/G3-Heresy-MPOA-G-W99-D0.0690-R02-GGUF
-GGUF (imatrix): https://huggingface.co/spikymoth/G3-Heresy-MPOA-G-W99-D0.0690-R02-i1-GGUF
-MLX: https://huggingface.co/spikymoth/G3-Heresy-MPOA-G-W99-D0.0690-R02-MLX

 ---
 language: en
 tags:
 - mlx
+pipeline_tag: text-generation
+library_name: mlx
 ---

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<image_soft_token>": 262144
+}

chat_template.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n    {%- if messages[0]['content'] is string -%}\n        {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n    {%- else -%}\n        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n    {%- endif -%}\n    {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n    {%- set first_user_prefix = \"\" -%}\n    {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n        {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n    {%- endif -%}\n    {%- if (message['role'] == 'assistant') -%}\n        {%- set role = \"model\" -%}\n    {%- else -%}\n        {%- set role = message['role'] -%}\n    {%- endif -%}\n    {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n    {%- if message['content'] is string -%}\n        {{ message['content'] | trim }}\n    {%- elif message['content'] is iterable -%}\n        {%- for item in message['content'] -%}\n            {%- if item['type'] == 'image' -%}\n                {{ '<start_of_image>' }}\n            {%- elif item['type'] == 'text' -%}\n                {{ item['text'] | trim }}\n            {%- endif -%}\n        {%- endfor -%}\n    {%- else -%}\n        {{ raise_exception(\"Invalid content type\") }}\n    {%- endif -%}\n    {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{'<start_of_turn>model\n'}}\n{%- endif -%}\n"
+}

config.json CHANGED Viewed

@@ -3,6 +3,7 @@
         "Gemma3ForConditionalGeneration"
     ],
     "boi_token_index": 255999,
     "eoi_token_index": 256000,
     "eos_token_id": [
         1,
@@ -23,21 +24,98 @@
         "mode": "affine"
     },
     "text_config": {
         "head_dim": 128,
         "hidden_size": 5376,
         "intermediate_size": 21504,
         "model_type": "gemma3_text",
         "num_attention_heads": 32,
         "num_hidden_layers": 62,
         "num_key_value_heads": 16,
         "query_pre_attn_scalar": 168,
         "rope_scaling": {
             "factor": 8.0,
             "rope_type": "linear"
         },
         "sliding_window": 1024,
         "vocab_size": 262208
     },
-    "torch_dtype": "bfloat16",
-    "transformers_version": "4.50.0.dev0"
 }

         "Gemma3ForConditionalGeneration"
     ],
     "boi_token_index": 255999,
+    "dtype": "bfloat16",
     "eoi_token_index": 256000,
     "eos_token_id": [
         1,
         "mode": "affine"
     },
     "text_config": {
+        "_sliding_window_pattern": 6,
+        "attention_bias": false,
+        "attention_dropout": 0.0,
+        "attn_logit_softcapping": null,
+        "dtype": "bfloat16",
+        "final_logit_softcapping": null,
         "head_dim": 128,
+        "hidden_activation": "gelu_pytorch_tanh",
         "hidden_size": 5376,
+        "initializer_range": 0.02,
         "intermediate_size": 21504,
+        "layer_types": [
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "full_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "full_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "full_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "full_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "full_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "full_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "full_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "full_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "full_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "full_attention",
+            "sliding_attention",
+            "sliding_attention"
+        ],
+        "max_position_embeddings": 131072,
         "model_type": "gemma3_text",
         "num_attention_heads": 32,
         "num_hidden_layers": 62,
         "num_key_value_heads": 16,
         "query_pre_attn_scalar": 168,
+        "rms_norm_eps": 1e-06,
+        "rope_local_base_freq": 10000.0,
         "rope_scaling": {
             "factor": 8.0,
             "rope_type": "linear"
         },
+        "rope_theta": 1000000.0,
         "sliding_window": 1024,
+        "use_bidirectional_attention": false,
+        "use_cache": true,
         "vocab_size": 262208
     },
+    "transformers_version": "4.57.1"
 }

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "do_convert_rgb": null,
+  "do_normalize": true,
+  "do_pan_and_scan": null,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "Gemma3ImageProcessor",
+  "image_seq_length": 256,
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "pan_and_scan_max_num_crops": null,
+  "pan_and_scan_min_crop_size": null,
+  "pan_and_scan_min_ratio_to_activate": null,
+  "processor_class": "Gemma3Processor",
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 896,
+    "width": 896
+  }
+}

processor_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "image_seq_length": 256,
+  "processor_class": "Gemma3Processor"
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "boi_token": "<start_of_image>",
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eoi_token": "<end_of_image>",
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "image_token": "<image_soft_token>",
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
+size 4689074