TheBloke
/

Mistral-7B-v0.1-AWQ

Text Generation

text-generation-inference

4-bit precision

Model card Files Files and versions

Update README.md

#2

by milistu - opened Jan 20, 2024

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

Files changed (1) hide show

README.md +7 -3

README.md CHANGED Viewed

@@ -108,12 +108,15 @@ pip3 install git+https://github.com/casper-hansen/AutoAWQ.git@1c5ccc791fa2cb0697
 from awq import AutoAWQForCausalLM
 from transformers import AutoTokenizer
 model_name_or_path = "TheBloke/Mistral-7B-v0.1-AWQ"
 # Load model
 model = AutoAWQForCausalLM.from_quantized(model_name_or_path, fuse_layers=True,
-                                          trust_remote_code=False, safetensors=True)
-tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=False)
 prompt = "Tell me about AI"
 prompt_template=f'''{prompt}
@@ -154,7 +157,8 @@ pipe = pipeline(
     temperature=0.7,
     top_p=0.95,
     top_k=40,
-    repetition_penalty=1.1
 )
 print(pipe(prompt_template)[0]['generated_text'])

 from awq import AutoAWQForCausalLM
 from transformers import AutoTokenizer
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
 model_name_or_path = "TheBloke/Mistral-7B-v0.1-AWQ"
 # Load model
 model = AutoAWQForCausalLM.from_quantized(model_name_or_path, fuse_layers=True,
+                                          trust_remote_code=False, safetensors=True, device=device)
+tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=False, device=device)
 prompt = "Tell me about AI"
 prompt_template=f'''{prompt}
     temperature=0.7,
     top_p=0.95,
     top_k=40,
+    repetition_penalty=1.1,
+    device=device,
 )
 print(pipe(prompt_template)[0]['generated_text'])