Machlovi commited on
Commit
2ed974c
·
verified ·
1 Parent(s): d7bb283

Create handler.py

Browse files
Files changed (1) hide show
  1. handler.py +24 -0
handler.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from unsloth import FastLanguageModel
2
+ from peft import PeftModel
3
+ import torch
4
+
5
+ # Load the base model
6
+ base_model_name = "unsloth/gemma-3-12b-it-bnb-4bit"
7
+ model, tokenizer = FastLanguageModel.from_pretrained(
8
+ model_name=base_model_name,
9
+ max_seq_length=4096, # Must match fine-tuning
10
+ load_in_4bit=True,
11
+ )
12
+
13
+ # Load the fine-tuned LoRA adapter
14
+ lora_model_name = "Machlovi/Gemma3_12_MegaHateCatplus"
15
+ model = PeftModel.from_pretrained(model, lora_model_name)
16
+
17
+ input_text = "Why do we need to go to see something?"
18
+ inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
19
+
20
+ with torch.no_grad():
21
+ outputs = model.generate(**inputs, max_new_tokens=4)
22
+
23
+ # Decode and print response
24
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)