Monimoy commited on
Commit
1ac6fd4
·
verified ·
1 Parent(s): 4ebadf9

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -1
app.py CHANGED
@@ -15,7 +15,8 @@ peft_model_path = "./phi2-openassistant-lora-final"
15
  # Load the base model with 4-bit quantization
16
  #bnb_config = BitsAndBytesConfig(load_in_4bit=True) # Ensure compatibility
17
  #base_model = AutoModelForCausalLM.from_pretrained(base_model_name, quantization_config=bnb_config, device_map={"": device})
18
- base_model = AutoModelForCausalLM.from_pretrained(base_model_name, torch_dtype=torch.float32, device_map={"": device})
 
19
 
20
  # Load LoRA adapter
21
  model = PeftModel.from_pretrained(base_model, peft_model_path)
@@ -23,6 +24,7 @@ model = model.merge_and_unload() # Merge LoRA with base model
23
 
24
  # Load tokenizer
25
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
 
26
 
27
  # Define prediction function
28
  def generate_response(prompt):
 
15
  # Load the base model with 4-bit quantization
16
  #bnb_config = BitsAndBytesConfig(load_in_4bit=True) # Ensure compatibility
17
  #base_model = AutoModelForCausalLM.from_pretrained(base_model_name, quantization_config=bnb_config, device_map={"": device})
18
+ #base_model = AutoModelForCausalLM.from_pretrained(base_model_name, torch_dtype=torch.float32, device_map={"": device})
19
+ base_model = AutoModelForCausalLM.from_pretrained(base_model_name, load_in_4bit=True,, device_map={"": device})
20
 
21
  # Load LoRA adapter
22
  model = PeftModel.from_pretrained(base_model, peft_model_path)
 
24
 
25
  # Load tokenizer
26
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
27
+ tokenizer.pad_token = tokenizer.eos_token
28
 
29
  # Define prediction function
30
  def generate_response(prompt):