RanjithaRuttala
/

PEFT_starcode2-3b_merged

8-bit precision

Model card Files Files and versions

RanjithaRuttala commited on Dec 2, 2025

Commit

1987c4f

·

verified ·

1 Parent(s): c8723a7

Update handler.py

Files changed (1) hide show

handler.py +15 -6

handler.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import torch
-from unsloth import FastLanguageModel
-from transformers import AutoTokenizer
 # === Config ===
 MODEL_DIR = "./"  # Replace with path to your merged model folder on HF
@@ -12,11 +12,20 @@ TOP_P = 0.95
 # === Load merged model and tokenizer ===
 print("[Handler] Loading model and tokenizer...")
 # FastLanguageModel can load merged PEFT models directly
-model, tokenizer = FastLanguageModel.from_pretrained(
-    model_name=MODEL_DIR,
-    dtype=torch.float16,
-    load_in_4bit=False
 )
 model = model.to(DEVICE)
 model.eval()

 import torch
+# from unsloth import FastLanguageModel
+from transformers import AutoTokenizer,AutoModelForCausalLM
 # === Config ===
 MODEL_DIR = "./"  # Replace with path to your merged model folder on HF
 # === Load merged model and tokenizer ===
 print("[Handler] Loading model and tokenizer...")
 # FastLanguageModel can load merged PEFT models directly
+# model, tokenizer = FastLanguageModel.from_pretrained(
+#     model_name=MODEL_DIR,
+#     dtype=torch.float16,
+#     load_in_4bit=False
+# )
+tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_DIR,
+    torch_dtype=torch.float16,
+    device_map="auto"
 )
 model = model.to(DEVICE)
 model.eval()