daslab-testing
/

CloverLM

Text Generation

low-precision-training

Model card Files Files and versions

mansaripo commited on Mar 20

Commit

317675d

·

verified ·

1 Parent(s): afd6f57

Upload folder using huggingface_hub

Files changed (2) hide show

config.json +5 -5
modeling_cloverlm.py +1 -1

config.json CHANGED Viewed

@@ -12,6 +12,7 @@
     ]
   },
   "d_head": 128,
   "heads": 28,
   "hidden_size": 3584,
   "intermediate_size": 14336,
@@ -22,15 +23,14 @@
   "num_blocks": 29,
   "num_hidden_layers": 29,
   "num_key_value_heads": 7,
   "quartet_2_impl": "pseudoquant",
   "ratio": 4,
   "scale_type": "1/sqrt(d)",
-  "head_dim": 128,
   "tie_word_embeddings": true,
   "transformers_version": "5.3.0",
   "vocab_size": 32000,
-  "weight_tying": true,
-  "quantization_config": {
-    "quant_method": "quartet2"
-  }
 }

     ]
   },
   "d_head": 128,
+  "head_dim": 128,
   "heads": 28,
   "hidden_size": 3584,
   "intermediate_size": 14336,
   "num_blocks": 29,
   "num_hidden_layers": 29,
   "num_key_value_heads": 7,
+  "quantization_config": {
+    "quant_method": "quartet2"
+  },
   "quartet_2_impl": "pseudoquant",
   "ratio": 4,
   "scale_type": "1/sqrt(d)",
   "tie_word_embeddings": true,
   "transformers_version": "5.3.0",
   "vocab_size": 32000,
+  "weight_tying": true
 }

modeling_cloverlm.py CHANGED Viewed

@@ -246,4 +246,4 @@ class CloverLMForCausalLM(PreTrainedModel, GenerationMixin):
         return {"input_ids": input_ids}
     def _supports_default_dynamic_cache(self):
-        return False

         return {"input_ids": input_ids}
     def _supports_default_dynamic_cache(self):
+        return False