joseAndres777 commited on
Commit
538a0aa
·
verified ·
1 Parent(s): 1e38b06

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +11 -5
handler.py CHANGED
@@ -12,15 +12,20 @@ class EndpointHandler:
12
  """
13
  Initialize the handler with the model from the given path
14
  """
 
 
 
15
  # Load tokenizer
16
- self.tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.3-70B-Instruct")
17
 
18
- # Load base model - CRITICAL: Match training setup exactly
19
  base_model = AutoModelForCausalLM.from_pretrained(
20
- "meta-llama/Llama-3.3-70B-Instruct",
21
  torch_dtype=torch.float16,
22
- device_map="auto", # Use auto for compatibility
23
- trust_remote_code=True
 
 
24
  )
25
 
26
  # Load LoRA adapters - use force download to ensure fresh state
@@ -30,6 +35,7 @@ class EndpointHandler:
30
  path,
31
  is_trainable=False # Inference mode
32
  )
 
33
  except Exception as e:
34
  print(f"Error loading adapter: {e}")
35
  # Fallback: try without adapter (base model only)
 
12
  """
13
  Initialize the handler with the model from the given path
14
  """
15
+ # Use original model that matches the trained adapter
16
+ model_name = "meta-llama/Llama-3.3-70B-Instruct"
17
+
18
  # Load tokenizer
19
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
20
 
21
+ # Load base model with memory optimization
22
  base_model = AutoModelForCausalLM.from_pretrained(
23
+ model_name,
24
  torch_dtype=torch.float16,
25
+ device_map="auto",
26
+ trust_remote_code=True,
27
+ load_in_8bit=True, # 8-bit quantization to save memory
28
+ low_cpu_mem_usage=True
29
  )
30
 
31
  # Load LoRA adapters - use force download to ensure fresh state
 
35
  path,
36
  is_trainable=False # Inference mode
37
  )
38
+ print("Successfully loaded adapter with base model")
39
  except Exception as e:
40
  print(f"Error loading adapter: {e}")
41
  # Fallback: try without adapter (base model only)