syberWolf commited on
Commit
87f7af9
·
1 Parent(s): ef898a1
Files changed (1) hide show
  1. handler.py +7 -3
handler.py CHANGED
@@ -6,13 +6,17 @@ import os
6
  class EndpointHandler:
7
  def __init__(self, model_path=""):
8
  # Construct the model path assuming the model is in the same directory as the handler file
9
- # script_dir = os.path.dirname(os.path.abspath(__file__))
10
  model_filename = "Phi-3-medium-128k-instruct-IQ2_XS.gguf"
11
- # self.model_path = os.path.join(script_dir, model_path)
 
 
 
 
12
 
13
  # Load the GGUF model using llama_cpp
14
  self.llm = Llama(
15
- model_path=model_filename,
16
  n_ctx=5000, # Set context length to 5000 tokens
17
  # n_threads=12, # Adjust the number of CPU threads as per your machine
18
  n_gpu_layers=-1 # Adjust based on GPU availability
 
6
  class EndpointHandler:
7
  def __init__(self, model_path=""):
8
  # Construct the model path assuming the model is in the same directory as the handler file
9
+ script_dir = os.path.dirname(os.path.abspath(__file__))
10
  model_filename = "Phi-3-medium-128k-instruct-IQ2_XS.gguf"
11
+ self.model_path = os.path.join(script_dir, model_filename)
12
+
13
+ # Check if the model file exists
14
+ if not os.path.exists(self.model_path):
15
+ raise ValueError(f"Model path does not exist: {self.model_path}")
16
 
17
  # Load the GGUF model using llama_cpp
18
  self.llm = Llama(
19
+ model_path=self.model_path,
20
  n_ctx=5000, # Set context length to 5000 tokens
21
  # n_threads=12, # Adjust the number of CPU threads as per your machine
22
  n_gpu_layers=-1 # Adjust based on GPU availability