RichardLu commited on
Commit
21274a1
·
verified ·
1 Parent(s): a5bea71

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +11 -16
handler.py CHANGED
@@ -2,36 +2,38 @@ import os
2
  from typing import Dict, List, Any
3
  import torch
4
  from unsloth import FastLanguageModel
5
- from transformers import AutoTokenizer
6
 
7
  class EndpointHandler:
8
  def __init__(self, model_dir: str = ""):
 
9
  # Retrieve the Hugging Face token from the environment variable.
10
  hf_token = os.getenv("HF_TOKEN", None)
11
 
12
- # If the provided model_dir is the default '/repository' (or empty), override it.
13
  if not model_dir or model_dir.strip() == "" or model_dir == "/repository":
14
  model_dir = "RichardLu/Mistral7b_AE_res"
 
15
 
16
  # Configuration for inference.
17
  max_seq_length = 2048
18
- dtype = None # Auto-detect data type; adjust if needed.
19
- load_in_4bit = True # Use 4-bit quantizationa to reduce memory usage.
20
 
21
- # Load the model and tokenizer with proper parameters.
22
  self.model, self.tokenizer = FastLanguageModel.from_pretrained(
23
  model_name=model_dir,
24
  max_seq_length=max_seq_length,
25
  dtype=dtype,
26
  load_in_4bit=load_in_4bit,
27
- token=hf_token, # Use the HF token if provided.
28
- trust_remote_code=True # Ensure that custom model code is allowed.
29
  )
 
30
 
31
- # Switch the model to inference mode.
32
  FastLanguageModel.for_inference(self.model)
33
 
34
- # Define the instruction text with examples for aspect extraction.
35
  self.instructabsa_instruction = (
36
  "Definition: The output will be the aspects (both implicit and explicit) which have an associated opinion that are extracted from the input text. "
37
  "In cases where there are no aspects the output should be noaspectterm.\n"
@@ -66,23 +68,16 @@ class EndpointHandler:
66
  )
67
 
68
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
69
- # Retrieve the input text from the request payload.
70
  input_text = data.get("inputs", "")
71
  if not input_text:
72
  return [{"error": "No input provided."}]
73
 
74
- # Build the final prompt using the template.
75
  prompt = self.alpaca_prompt.format(self.instructabsa_instruction, input_text, "")
76
-
77
- # Set device.
78
  device = "cuda" if torch.cuda.is_available() else "cpu"
79
  inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
80
-
81
- # Generate the model output.
82
  output_ids = self.model.generate(**inputs, max_new_tokens=128)
83
  output_text = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
84
 
85
- # Extract the predicted aspects from the generated output.
86
  if "### Response:" in output_text:
87
  predicted_aspects = output_text.split("### Response:")[-1].strip()
88
  else:
 
2
  from typing import Dict, List, Any
3
  import torch
4
  from unsloth import FastLanguageModel
 
5
 
6
  class EndpointHandler:
7
  def __init__(self, model_dir: str = ""):
8
+ print(f"[DEBUG] Original model_dir: {model_dir}")
9
  # Retrieve the Hugging Face token from the environment variable.
10
  hf_token = os.getenv("HF_TOKEN", None)
11
 
12
+ # If the provided model_dir is empty or default, override it.
13
  if not model_dir or model_dir.strip() == "" or model_dir == "/repository":
14
  model_dir = "RichardLu/Mistral7b_AE_res"
15
+ print(f"[DEBUG] Using model_dir: {model_dir}")
16
 
17
  # Configuration for inference.
18
  max_seq_length = 2048
19
+ dtype = None # Auto-detect data type.
20
+ load_in_4bit = True
21
 
22
+ # Load the model and tokenizer with custom settings.
23
  self.model, self.tokenizer = FastLanguageModel.from_pretrained(
24
  model_name=model_dir,
25
  max_seq_length=max_seq_length,
26
  dtype=dtype,
27
  load_in_4bit=load_in_4bit,
28
+ token=hf_token,
29
+ trust_remote_code=True
30
  )
31
+ print("[DEBUG] Model and tokenizer loaded successfully.")
32
 
33
+ # Set the model to inference mode.
34
  FastLanguageModel.for_inference(self.model)
35
 
36
+ # Define the instruction text for aspect extraction.
37
  self.instructabsa_instruction = (
38
  "Definition: The output will be the aspects (both implicit and explicit) which have an associated opinion that are extracted from the input text. "
39
  "In cases where there are no aspects the output should be noaspectterm.\n"
 
68
  )
69
 
70
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
 
71
  input_text = data.get("inputs", "")
72
  if not input_text:
73
  return [{"error": "No input provided."}]
74
 
 
75
  prompt = self.alpaca_prompt.format(self.instructabsa_instruction, input_text, "")
 
 
76
  device = "cuda" if torch.cuda.is_available() else "cpu"
77
  inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
 
 
78
  output_ids = self.model.generate(**inputs, max_new_tokens=128)
79
  output_text = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
80
 
 
81
  if "### Response:" in output_text:
82
  predicted_aspects = output_text.split("### Response:")[-1].strip()
83
  else: