from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline from langchain_community.llms import HuggingFacePipeline import torch def load_llm(model_path): tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( model_path, device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True ) pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=1024, temperature=0.2, repetition_penalty=1.15 ) return HuggingFacePipeline(pipeline=pipe)