Machlovi commited on
Commit
98e5726
·
verified ·
1 Parent(s): a8eb09f

Create handler.py

Browse files
Files changed (1) hide show
  1. handler.py +32 -0
handler.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import unsloth
3
+ from transformers import AutoTokenizer, pipeline
4
+ from peft import AutoPeftModelForCausalLM
5
+
6
+ MODEL_NAME = "unsloth/Phi-4-unsloth-bnb-4bit" # Base model name (e.g., mistralai/Mistral-7B)
7
+ LORA_ADAPTER = "Machlovi/Safe_Phi4" # Your LoRA fine-tuned adapter
8
+
9
+ def load_model():
10
+ """Loads the base model and LoRA adapter using Unsloth."""
11
+ print("Loading base model with Unsloth...")
12
+
13
+ # Use Unsloth to load model in 4-bit efficiently
14
+ model, tokenizer = unsloth.load_peft(
15
+ model_name=MODEL_NAME,
16
+ peft_model= LORA_ADAPTER,
17
+ load_in_4bit=True, # Ensure it's 4-bit
18
+ max_seq_length=4096, # Adjust as per your needs
19
+ dtype=torch.float16,
20
+ )
21
+
22
+ print("Creating text generation pipeline...")
23
+ text_gen_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
24
+
25
+ return text_gen_pipeline
26
+
27
+ # Load model globally so it doesn't reload on every request
28
+ pipe = load_model()
29
+
30
+ def infer(prompt: str, max_new_tokens=128):
31
+ """Generate text using the Unsloth LoRA-adapted model."""
32
+ return pipe(prompt, max_new_tokens=max_new_tokens)[0]['generated_text']