joseAndres777 commited on
Commit
c4c2bd8
·
verified ·
1 Parent(s): 0a3560b

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +16 -5
handler.py CHANGED
@@ -11,15 +11,26 @@ class EndpointHandler:
11
  # Load tokenizer
12
  self.tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.3-70B-Instruct")
13
 
14
- # Load base model
15
  base_model = AutoModelForCausalLM.from_pretrained(
16
  "meta-llama/Llama-3.3-70B-Instruct",
17
  torch_dtype=torch.float16,
18
- device_map="auto"
 
19
  )
20
-
21
- # Load LoRA adapters from the current path
22
- self.model = PeftModel.from_pretrained(base_model, path)
 
 
 
 
 
 
 
 
 
 
23
 
24
  # Load chat template
25
  try:
 
11
  # Load tokenizer
12
  self.tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.3-70B-Instruct")
13
 
14
+ # Load base model - CRITICAL: Match training setup exactly
15
  base_model = AutoModelForCausalLM.from_pretrained(
16
  "meta-llama/Llama-3.3-70B-Instruct",
17
  torch_dtype=torch.float16,
18
+ device_map="auto", # Use auto for compatibility
19
+ trust_remote_code=True
20
  )
21
+
22
+ # Load LoRA adapters - use force download to ensure fresh state
23
+ try:
24
+ self.model = PeftModel.from_pretrained(
25
+ base_model,
26
+ path,
27
+ is_trainable=False # Inference mode
28
+ )
29
+ except Exception as e:
30
+ print(f"Error loading adapter: {e}")
31
+ # Fallback: try without adapter (base model only)
32
+ print("Falling back to base model without adapter")
33
+ self.model = base_model
34
 
35
  # Load chat template
36
  try: