RichardLu commited on
Commit
88198cf
·
verified ·
1 Parent(s): 93aafb9

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +10 -6
handler.py CHANGED
@@ -9,20 +9,23 @@ class EndpointHandler:
9
  # Retrieve the Hugging Face token from the environment variable.
10
  hf_token = os.getenv("HF_TOKEN", None)
11
 
12
- # Configuration for inference
 
 
 
 
13
  max_seq_length = 2048
14
  dtype = None # Auto-detect data type; adjust if needed.
15
  load_in_4bit = True # Use 4-bit quantization to reduce memory usage.
16
 
17
  # Load the model and tokenizer.
18
- # If a model_dir is provided by the endpoint, it will load from that path;
19
- # otherwise, it uses a default UnsLoth Mistral model.
20
  self.model, self.tokenizer = FastLanguageModel.from_pretrained(
21
- model_name=RichardLu/Mistral7b_AE_res,
22
  max_seq_length=max_seq_length,
23
  dtype=dtype,
24
  load_in_4bit=load_in_4bit,
25
- token=hf_token # Pass the HF token if provided.
 
26
  )
27
 
28
  # Switch the model to inference mode.
@@ -30,7 +33,8 @@ class EndpointHandler:
30
 
31
  # Define the instruction text with examples for aspect extraction.
32
  self.instructabsa_instruction = (
33
- "Definition: The output will be the aspects (both implicit and explicit) which have an associated opinion that are extracted from the input text. In cases where there are no aspects the output should be noaspectterm.\n"
 
34
  "Positive example 1-\n"
35
  "input: With the great variety on the menu, I eat here often and never get bored.\n"
36
  "output: menu\n"
 
9
  # Retrieve the Hugging Face token from the environment variable.
10
  hf_token = os.getenv("HF_TOKEN", None)
11
 
12
+ # If the endpoint provides '/repository', override with the intended model ID.
13
+ if model_dir == "/repository" or model_dir.strip() == "":
14
+ model_dir = "RichardLu/Mistral7b_AE_res"
15
+
16
+ # Configuration for inference.
17
  max_seq_length = 2048
18
  dtype = None # Auto-detect data type; adjust if needed.
19
  load_in_4bit = True # Use 4-bit quantization to reduce memory usage.
20
 
21
  # Load the model and tokenizer.
 
 
22
  self.model, self.tokenizer = FastLanguageModel.from_pretrained(
23
+ model_name=model_dir,
24
  max_seq_length=max_seq_length,
25
  dtype=dtype,
26
  load_in_4bit=load_in_4bit,
27
+ token=hf_token, # Use the HF token if provided.
28
+ trust_remote_code=True # Enable if your model requires remote code.
29
  )
30
 
31
  # Switch the model to inference mode.
 
33
 
34
  # Define the instruction text with examples for aspect extraction.
35
  self.instructabsa_instruction = (
36
+ "Definition: The output will be the aspects (both implicit and explicit) which have an associated opinion that are extracted from the input text. "
37
+ "In cases where there are no aspects the output should be noaspectterm.\n"
38
  "Positive example 1-\n"
39
  "input: With the great variety on the menu, I eat here often and never get bored.\n"
40
  "output: menu\n"