running model issue
I suggest I insatlled all required packages and running this script:
"
import torch
from transformers.utils.quantization_config import HiggsConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline, utils
model_path = ".../Llama-3.3-70B-Instruct-HIGGS-4bit"
model = AutoModelForCausalLM.from_pretrained(
model_path,
quantization_config=HiggsConfig(bits=4)
device_map="auto",
)
model = torch.compile(model)
tokenizer = AutoTokenizer.from_pretrained(model_path)
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
print("Chat with your local model! Type 'exit' to quit.")
while True:
user_input = input("You: ")
if user_input.lower() in ["exit", "quit"]:
break
You may customize prompt formatting depending on the model’s instruction tuning
prompt = f"user\n{user_input}\nmodel\n"
output = generator(prompt, max_new_tokens=256, do_sample=True, temperature=0.7)
print("LLM:", output[0]["generated_text"][len(prompt):].strip())
"
gets me error:
"
This IS expected if you are initializing LlamaForCausalLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
This IS NOT expected if you are initializing LlamaForCausalLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Traceback (most recent call last):
File "/home/user/src/envTorch/run_my.py", line 20, in
model = AutoModelForCausalLM.from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/src/envTorch/transformers/src/transformers/models/auto/auto_factory.py", line 571, in from_pretrained
return model_class.from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/src/envTorch/transformers/src/transformers/modeling_utils.py", line 280, in _wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/user/src/envTorch/transformers/src/transformers/modeling_utils.py", line 4595, in from_pretrained
hf_quantizer.postprocess_model(model, config=config)
File "/home/user/src/envTorch/transformers/src/transformers/quantizers/base.py", line 238, in postprocess_model
return self._process_model_after_weight_loading(model, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/src/envTorch/transformers/src/transformers/quantizers/quantizer_higgs.py", line 153, in _process_model_after_weight_loading
module.tune_metadata = TuneMetaData.from_dict(self.quantization_config.tune_metadata[name])
KeyError: 'model.layers.0.self_attn.q_proj'
"
i tried to overcome it with different "tune_metadata" settings tricks, but no luck.
Can u please consider what's wrong in my script?