GilbertAkham commited on
Commit
730c68d
·
verified ·
1 Parent(s): 4b8bb26

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +14 -5
handler.py CHANGED
@@ -3,27 +3,34 @@ import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from peft import PeftModel
5
 
6
- BASE_MODEL = "deepseek-ai/deepseek-coder-6.7b-instruct" # or your real base
 
7
  ADAPTER_PATH = "GilbertAkham/deepseek-R1-multitask-lora"
8
 
9
  class EndpointHandler:
10
  def __init__(self, path=""):
11
- print("Loading base model...")
12
  self.tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
 
 
13
  base_model = AutoModelForCausalLM.from_pretrained(
14
  BASE_MODEL,
15
  torch_dtype=torch.float16,
16
  device_map="auto",
17
  trust_remote_code=True
18
  )
19
- print("Attaching LoRA adapter...")
 
 
20
  self.model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
21
  self.model.eval()
22
- print("✅ LoRA adapter loaded successfully.")
 
23
 
24
  def __call__(self, data):
25
  prompt = data.get("inputs", "")
26
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
 
27
  with torch.no_grad():
28
  outputs = self.model.generate(
29
  **inputs,
@@ -34,4 +41,6 @@ class EndpointHandler:
34
  pad_token_id=self.tokenizer.eos_token_id,
35
  eos_token_id=self.tokenizer.eos_token_id,
36
  )
37
- return {"generated_text": self.tokenizer.decode(outputs[0], skip_special_tokens=True)}
 
 
 
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from peft import PeftModel
5
 
6
+ # Base model that your LoRA was trained on (must match training)
7
+ BASE_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" # change if you trained on a different DeepSeek variant
8
  ADAPTER_PATH = "GilbertAkham/deepseek-R1-multitask-lora"
9
 
10
  class EndpointHandler:
11
  def __init__(self, path=""):
12
+ print("🚀 Loading base model...")
13
  self.tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
14
+
15
+ # Load base model
16
  base_model = AutoModelForCausalLM.from_pretrained(
17
  BASE_MODEL,
18
  torch_dtype=torch.float16,
19
  device_map="auto",
20
  trust_remote_code=True
21
  )
22
+
23
+ print(f"🔗 Attaching LoRA adapter from {ADAPTER_PATH}...")
24
+ # Load the LoRA adapter properly
25
  self.model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
26
  self.model.eval()
27
+
28
+ print("✅ Model + LoRA adapter loaded successfully.")
29
 
30
  def __call__(self, data):
31
  prompt = data.get("inputs", "")
32
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
33
+
34
  with torch.no_grad():
35
  outputs = self.model.generate(
36
  **inputs,
 
41
  pad_token_id=self.tokenizer.eos_token_id,
42
  eos_token_id=self.tokenizer.eos_token_id,
43
  )
44
+
45
+ text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
46
+ return {"generated_text": text}