Merge branch 'main' of https://huggingface.co/OPEA/MiniMax-Text-01-int4-sym-inc-preview into main
Browse files
README.md
CHANGED
|
@@ -30,7 +30,7 @@ from auto_round import AutoRoundConfig ##must import for autoround format
|
|
| 30 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 31 |
import torch
|
| 32 |
|
| 33 |
-
quantized_model_dir = "/
|
| 34 |
|
| 35 |
tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, trust_remote_code=True)
|
| 36 |
model = AutoModelForCausalLM.from_pretrained(quantized_model_dir, trust_remote_code=True, torch_dtype=torch.bfloat16,
|
|
@@ -41,13 +41,13 @@ def forward_hook(module, input, output):
|
|
| 41 |
return torch.clamp(output, -65504, 65504).to(torch.bfloat16)
|
| 42 |
|
| 43 |
|
| 44 |
-
def
|
| 45 |
for name, module in model.named_modules():
|
| 46 |
if "QuantLinear" in module.__class__.__name__ or isinstance(module, torch.nn.Linear):
|
| 47 |
module.register_forward_hook(forward_hook)
|
| 48 |
|
| 49 |
|
| 50 |
-
|
| 51 |
tokenizer.pad_token = tokenizer.eos_token
|
| 52 |
|
| 53 |
prompts = [
|
|
@@ -153,7 +153,7 @@ pip3 install git+https://github.com/intel/auto-round.git@bf16_inference
|
|
| 153 |
|
| 154 |
```python
|
| 155 |
import torch
|
| 156 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 157 |
|
| 158 |
model_name = "MiniMaxAI/MiniMax-Text-01"
|
| 159 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
|
| 30 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 31 |
import torch
|
| 32 |
|
| 33 |
+
quantized_model_dir = "OPEA/MiniMax-Text-01-int4-sym-inc-preview"
|
| 34 |
|
| 35 |
tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, trust_remote_code=True)
|
| 36 |
model = AutoModelForCausalLM.from_pretrained(quantized_model_dir, trust_remote_code=True, torch_dtype=torch.bfloat16,
|
|
|
|
| 41 |
return torch.clamp(output, -65504, 65504).to(torch.bfloat16)
|
| 42 |
|
| 43 |
|
| 44 |
+
def register_fp16_hooks(model):
|
| 45 |
for name, module in model.named_modules():
|
| 46 |
if "QuantLinear" in module.__class__.__name__ or isinstance(module, torch.nn.Linear):
|
| 47 |
module.register_forward_hook(forward_hook)
|
| 48 |
|
| 49 |
|
| 50 |
+
register_fp16_hooks(model)
|
| 51 |
tokenizer.pad_token = tokenizer.eos_token
|
| 52 |
|
| 53 |
prompts = [
|
|
|
|
| 153 |
|
| 154 |
```python
|
| 155 |
import torch
|
| 156 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 157 |
|
| 158 |
model_name = "MiniMaxAI/MiniMax-Text-01"
|
| 159 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|