from peft import PeftModel from huggingface_hub import login import torch from transformers import ( AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, logging, set_seed, BitsAndBytesConfig, ) MODEL = "bigcode/starcoderbase-1b" # Model checkpoint on the Hugging Face Hub # load the original model first print("Load Tokenizer") tokenizer = AutoTokenizer.from_pretrained(MODEL, trust_remote_code=True) tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "left" print("Load Model") base_model = AutoModelForCausalLM.from_pretrained( MODEL, quantization_config=None, device_map=None, trust_remote_code=True, torch_dtype=torch.float32, ).cuda() # merge fine-tuned weights with the base model peft_model_id = f"limernyou/starcoder-peft-conti" model = PeftModel.from_pretrained(base_model, peft_model_id, adapter_name="personal_copilot") #model.add_weighted_adapter(["personal_copilot"], [0.8], "best_personal_copilot") #model.set_adapter("best_personal_copilot") model.merge_and_unload() #if not hasattr(model, "hf_device_map"): # model.cuda() def get_code_completion(prefix, suffix): text = prompt = f"""{prefix}{suffix}""" base_model.eval() outputs = base_model.generate( input_ids=tokenizer(text, return_tensors="pt").input_ids.cuda(), #attention_mask=tokenizer(prompt, return_tensors="pt").to("cuda")["attention_mask"], max_new_tokens=128, temperature=0.2, top_k=50, top_p=0.95, do_sample=True, repetition_penalty=1.0, ) return tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] def get_code_completion1(prefix, suffix): prompt = prefix + suffix inputs = tokenizer(prompt, return_tensors="pt").to("cuda") model.eval() with torch.no_grad(): outputs = model.generate( input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_new_tokens=128, temperature=0.2, top_k=50, top_p=0.95, do_sample=True, repetition_penalty=1.0, pad_token_id=tokenizer.eos_token_id, ) output_text = tokenizer.decode(outputs[0], skip_special_tokens=True) completion = output_text.split("")[-1].strip() return completion prefix = """from peft import LoraConfig, TaskType, get_peft_model from transformers import AutoModelForCausalLM peft_config = LoraConfig( """ suffix = """""" print("Starcoder generating response") #print(tokenizer.special_tokens_map) print(get_code_completion(prefix, suffix)) print("Successful")