GilbertAkham commited on
Commit
8192254
·
verified ·
1 Parent(s): 730c68d

Delete handler.py

Browse files
Files changed (1) hide show
  1. handler.py +0 -46
handler.py DELETED
@@ -1,46 +0,0 @@
1
- # handler.py
2
- import torch
3
- from transformers import AutoTokenizer, AutoModelForCausalLM
4
- from peft import PeftModel
5
-
6
- # Base model that your LoRA was trained on (must match training)
7
- BASE_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" # change if you trained on a different DeepSeek variant
8
- ADAPTER_PATH = "GilbertAkham/deepseek-R1-multitask-lora"
9
-
10
- class EndpointHandler:
11
- def __init__(self, path=""):
12
- print("🚀 Loading base model...")
13
- self.tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
14
-
15
- # Load base model
16
- base_model = AutoModelForCausalLM.from_pretrained(
17
- BASE_MODEL,
18
- torch_dtype=torch.float16,
19
- device_map="auto",
20
- trust_remote_code=True
21
- )
22
-
23
- print(f"🔗 Attaching LoRA adapter from {ADAPTER_PATH}...")
24
- # Load the LoRA adapter properly
25
- self.model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
26
- self.model.eval()
27
-
28
- print("✅ Model + LoRA adapter loaded successfully.")
29
-
30
- def __call__(self, data):
31
- prompt = data.get("inputs", "")
32
- inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
33
-
34
- with torch.no_grad():
35
- outputs = self.model.generate(
36
- **inputs,
37
- max_new_tokens=512,
38
- temperature=0.7,
39
- top_p=0.9,
40
- do_sample=True,
41
- pad_token_id=self.tokenizer.eos_token_id,
42
- eos_token_id=self.tokenizer.eos_token_id,
43
- )
44
-
45
- text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
46
- return {"generated_text": text}