RichardLu
/

Mistral7b_AE_res

@@ -2,36 +2,38 @@ import os
 from typing import Dict, List, Any
 import torch
 from unsloth import FastLanguageModel
-from transformers import AutoTokenizer
 class EndpointHandler:
     def __init__(self, model_dir: str = ""):
         # Retrieve the Hugging Face token from the environment variable.
         hf_token = os.getenv("HF_TOKEN", None)
-        # If the provided model_dir is the default '/repository' (or empty), override it.
         if not model_dir or model_dir.strip() == "" or model_dir == "/repository":
             model_dir = "RichardLu/Mistral7b_AE_res"
         # Configuration for inference.
         max_seq_length = 2048
-        dtype = None           # Auto-detect data type; adjust if needed.
-        load_in_4bit = True    # Use 4-bit quantizationa to reduce memory usage.
-        # Load the model and tokenizer with proper parameters.
         self.model, self.tokenizer = FastLanguageModel.from_pretrained(
             model_name=model_dir,
             max_seq_length=max_seq_length,
             dtype=dtype,
             load_in_4bit=load_in_4bit,
-            token=hf_token,             # Use the HF token if provided.
-            trust_remote_code=True      # Ensure that custom model code is allowed.
         )
-        # Switch the model to inference mode.
         FastLanguageModel.for_inference(self.model)
-        # Define the instruction text with examples for aspect extraction.
         self.instructabsa_instruction = (
             "Definition: The output will be the aspects (both implicit and explicit) which have an associated opinion that are extracted from the input text. "
             "In cases where there are no aspects the output should be noaspectterm.\n"
@@ -66,23 +68,16 @@ class EndpointHandler:
         )
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
-        # Retrieve the input text from the request payload.
         input_text = data.get("inputs", "")
         if not input_text:
             return [{"error": "No input provided."}]
-        # Build the final prompt using the template.
         prompt = self.alpaca_prompt.format(self.instructabsa_instruction, input_text, "")
-        # Set device.
         device = "cuda" if torch.cuda.is_available() else "cpu"
         inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
-        # Generate the model output.
         output_ids = self.model.generate(**inputs, max_new_tokens=128)
         output_text = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
-        # Extract the predicted aspects from the generated output.
         if "### Response:" in output_text:
             predicted_aspects = output_text.split("### Response:")[-1].strip()
         else:

 from typing import Dict, List, Any
 import torch
 from unsloth import FastLanguageModel
 class EndpointHandler:
     def __init__(self, model_dir: str = ""):
+        print(f"[DEBUG] Original model_dir: {model_dir}")
         # Retrieve the Hugging Face token from the environment variable.
         hf_token = os.getenv("HF_TOKEN", None)
+        # If the provided model_dir is empty or default, override it.
         if not model_dir or model_dir.strip() == "" or model_dir == "/repository":
             model_dir = "RichardLu/Mistral7b_AE_res"
+        print(f"[DEBUG] Using model_dir: {model_dir}")
         # Configuration for inference.
         max_seq_length = 2048
+        dtype = None  # Auto-detect data type.
+        load_in_4bit = True
+        # Load the model and tokenizer with custom settings.
         self.model, self.tokenizer = FastLanguageModel.from_pretrained(
             model_name=model_dir,
             max_seq_length=max_seq_length,
             dtype=dtype,
             load_in_4bit=load_in_4bit,
+            token=hf_token,
+            trust_remote_code=True
         )
+        print("[DEBUG] Model and tokenizer loaded successfully.")
+        # Set the model to inference mode.
         FastLanguageModel.for_inference(self.model)
+        # Define the instruction text for aspect extraction.
         self.instructabsa_instruction = (
             "Definition: The output will be the aspects (both implicit and explicit) which have an associated opinion that are extracted from the input text. "
             "In cases where there are no aspects the output should be noaspectterm.\n"
         )
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         input_text = data.get("inputs", "")
         if not input_text:
             return [{"error": "No input provided."}]
         prompt = self.alpaca_prompt.format(self.instructabsa_instruction, input_text, "")
         device = "cuda" if torch.cuda.is_available() else "cpu"
         inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
         output_ids = self.model.generate(**inputs, max_new_tokens=128)
         output_text = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
         if "### Response:" in output_text:
             predicted_aspects = output_text.split("### Response:")[-1].strip()
         else: