Machlovi
/

Safe_Phi4_Full2

Model card Files Files and versions

Machlovi commited on Apr 4, 2025

Commit

98e5726

·

verified ·

1 Parent(s): a8eb09f

Create handler.py

Files changed (1) hide show

handler.py +32 -0

handler.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import torch
+import unsloth
+from transformers import AutoTokenizer, pipeline
+from peft import AutoPeftModelForCausalLM
+MODEL_NAME = "unsloth/Phi-4-unsloth-bnb-4bit"  # Base model name (e.g., mistralai/Mistral-7B)
+LORA_ADAPTER =  "Machlovi/Safe_Phi4" # Your LoRA fine-tuned adapter
+def load_model():
+    """Loads the base model and LoRA adapter using Unsloth."""
+    print("Loading base model with Unsloth...")
+    # Use Unsloth to load model in 4-bit efficiently
+    model, tokenizer = unsloth.load_peft(
+        model_name=MODEL_NAME,
+        peft_model= LORA_ADAPTER,
+        load_in_4bit=True,  # Ensure it's 4-bit
+        max_seq_length=4096,  # Adjust as per your needs
+        dtype=torch.float16,
+    )
+    print("Creating text generation pipeline...")
+    text_gen_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
+    return text_gen_pipeline
+# Load model globally so it doesn't reload on every request
+pipe = load_model()
+def infer(prompt: str, max_new_tokens=128):
+    """Generate text using the Unsloth LoRA-adapted model."""
+    return pipe(prompt, max_new_tokens=max_new_tokens)[0]['generated_text']