RichardLu
/

Mistral7b_AE_res

Model card Files Files and versions

RichardLu commited on Mar 18

Commit

88198cf

·

verified ·

1 Parent(s): 93aafb9

Update handler.py

Files changed (1) hide show

handler.py +10 -6

handler.py CHANGED Viewed

@@ -9,20 +9,23 @@ class EndpointHandler:
         # Retrieve the Hugging Face token from the environment variable.
         hf_token = os.getenv("HF_TOKEN", None)
-        # Configuration for inference
         max_seq_length = 2048
         dtype = None  # Auto-detect data type; adjust if needed.
         load_in_4bit = True  # Use 4-bit quantization to reduce memory usage.
         # Load the model and tokenizer.
-        # If a model_dir is provided by the endpoint, it will load from that path;
-        # otherwise, it uses a default UnsLoth Mistral model.
         self.model, self.tokenizer = FastLanguageModel.from_pretrained(
-            model_name=RichardLu/Mistral7b_AE_res,
             max_seq_length=max_seq_length,
             dtype=dtype,
             load_in_4bit=load_in_4bit,
-            token=hf_token  # Pass the HF token if provided.
         )
         # Switch the model to inference mode.
@@ -30,7 +33,8 @@ class EndpointHandler:
         # Define the instruction text with examples for aspect extraction.
         self.instructabsa_instruction = (
-            "Definition: The output will be the aspects (both implicit and explicit) which have an associated opinion that are extracted from the input text. In cases where there are no aspects the output should be noaspectterm.\n"
             "Positive example 1-\n"
             "input: With the great variety on the menu, I eat here often and never get bored.\n"
             "output: menu\n"

         # Retrieve the Hugging Face token from the environment variable.
         hf_token = os.getenv("HF_TOKEN", None)
+        # If the endpoint provides '/repository', override with the intended model ID.
+        if model_dir == "/repository" or model_dir.strip() == "":
+            model_dir = "RichardLu/Mistral7b_AE_res"
+        # Configuration for inference.
         max_seq_length = 2048
         dtype = None  # Auto-detect data type; adjust if needed.
         load_in_4bit = True  # Use 4-bit quantization to reduce memory usage.
         # Load the model and tokenizer.
         self.model, self.tokenizer = FastLanguageModel.from_pretrained(
+            model_name=model_dir,
             max_seq_length=max_seq_length,
             dtype=dtype,
             load_in_4bit=load_in_4bit,
+            token=hf_token,            # Use the HF token if provided.
+            trust_remote_code=True      # Enable if your model requires remote code.
         )
         # Switch the model to inference mode.
         # Define the instruction text with examples for aspect extraction.
         self.instructabsa_instruction = (
+            "Definition: The output will be the aspects (both implicit and explicit) which have an associated opinion that are extracted from the input text. "
+            "In cases where there are no aspects the output should be noaspectterm.\n"
             "Positive example 1-\n"
             "input: With the great variety on the menu, I eat here often and never get bored.\n"
             "output: menu\n"