Spaces:

Monimoy
/

fine_tuned_phi2_model

Runtime error

Monimoy commited on Mar 9, 2025

Commit

1ac6fd4

verified ·

1 Parent(s): 4ebadf9

Upload app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,7 +15,8 @@ peft_model_path = "./phi2-openassistant-lora-final"
 # Load the base model with 4-bit quantization
 #bnb_config = BitsAndBytesConfig(load_in_4bit=True)  # Ensure compatibility
 #base_model = AutoModelForCausalLM.from_pretrained(base_model_name, quantization_config=bnb_config, device_map={"": device})
-base_model = AutoModelForCausalLM.from_pretrained(base_model_name, torch_dtype=torch.float32, device_map={"": device})
 # Load LoRA adapter
 model = PeftModel.from_pretrained(base_model, peft_model_path)
@@ -23,6 +24,7 @@ model = model.merge_and_unload()  # Merge LoRA with base model
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(base_model_name)
 # Define prediction function
 def generate_response(prompt):

 # Load the base model with 4-bit quantization
 #bnb_config = BitsAndBytesConfig(load_in_4bit=True)  # Ensure compatibility
 #base_model = AutoModelForCausalLM.from_pretrained(base_model_name, quantization_config=bnb_config, device_map={"": device})
+#base_model = AutoModelForCausalLM.from_pretrained(base_model_name, torch_dtype=torch.float32, device_map={"": device})
+base_model = AutoModelForCausalLM.from_pretrained(base_model_name, load_in_4bit=True,, device_map={"": device})
 # Load LoRA adapter
 model = PeftModel.from_pretrained(base_model, peft_model_path)
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(base_model_name)
+tokenizer.pad_token = tokenizer.eos_token
 # Define prediction function
 def generate_response(prompt):