File size: 4,109 Bytes

91a59cb
3eec07e
956a6ac
 
 
3eec07e
 
21274a1
91a59cb
 
 
21274a1
9f5b21d
 
21274a1
88198cf
 
3eec07e
21274a1
 
91a59cb
21274a1
3eec07e
88198cf
3eec07e
 
91a59cb
21274a1
 
3eec07e
21274a1
3eec07e
21274a1
3eec07e
91a59cb
21274a1
3eec07e
88198cf
 
3eec07e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91a59cb
3eec07e
 
d1b9f7e
 
3eec07e
 
 
 
956a6ac
3eec07e
 
 
 
91a59cb
3eec07e
 
 
 
 
91a59cb
3eec07e
 
 
 
91a59cb
3eec07e

import os
from typing import Dict, List, Any
import torch
from unsloth import FastLanguageModel

class EndpointHandler:
    def __init__(self, model_dir: str = ""):
        print(f"[DEBUG] Original model_dir: {model_dir}")
        # Retrieve the Hugging Face token from the environment variable.
        hf_token = os.getenv("HF_TOKEN", None)
        
        # If the provided model_dir is empty or default, override it.
        #if not model_dir or model_dir.strip() == "" or model_dir == "/repository":
        model_dir = "RichardLu/Mistral7b_AE_res"
        print(f"[DEBUG] Using model_dir: {model_dir}")
        
        # Configuration for inference.
        max_seq_length = 2048
        dtype = None  # Auto-detect data type.
        load_in_4bit = True
        
        # Load the model and tokenizer with custom settings.
        self.model, self.tokenizer = FastLanguageModel.from_pretrained(
            model_name=model_dir,
            max_seq_length=max_seq_length,
            dtype=dtype,
            load_in_4bit=load_in_4bit,
            token=hf_token,
            trust_remote_code=True
        )
        print("[DEBUG] Model and tokenizer loaded successfully.")
        
        # Set the model to inference mode.
        FastLanguageModel.for_inference(self.model)
        
        # Define the instruction text for aspect extraction.
        self.instructabsa_instruction = (
            "Definition: The output will be the aspects (both implicit and explicit) which have an associated opinion that are extracted from the input text. "
            "In cases where there are no aspects the output should be noaspectterm.\n"
            "Positive example 1-\n"
            "input: With the great variety on the menu, I eat here often and never get bored.\n"
            "output: menu\n"
            "Positive example 2-\n"
            "input: Great food, good size menu, great service and an unpretensious setting.\n"
            "output: food, menu, service, setting\n"
            "Negative example 1-\n"
            "input: They did not have mayonnaise, forgot our toast, left out ingredients (ie cheese in an omelet), below hot temperatures and the bacon was so over cooked it crumbled on the plate when you touched it.\n"
            "output: toast, mayonnaise, bacon, ingredients, plate\n"
            "Negative example 2-\n"
            "input: The seats are uncomfortable if you are sitting against the wall on wooden benches.\n"
            "output: seats\n"
            "Neutral example 1-\n"
            "input: I asked for seltzer with lime, no ice.\n"
            "output: seltzer with lime\n"
            "Neutral example 2-\n"
            "input: They wouldnt even let me finish my glass of wine before offering another.\n"
            "output: glass of wine\n"
            "Now complete the following example:"
        )
        
        # Define the Alpaca-style prompt template.
        self.alpaca_prompt = (
            "Below is an instruction that describes a task, paired with an input that provides further context. "
            "Write a response that appropriately completes the request.\n"
            "### Instruction:\n{}\n"
            "### Input:\n{}\n"
            "### Response:\n{}"
        )

    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        input_text = data.get("inputs", "")
        if not input_text:
            return [{"error": "No input provided."}]
        
        prompt = self.alpaca_prompt.format(self.instructabsa_instruction, input_text, "")
        device = "cuda" if torch.cuda.is_available() else "cpu"
        inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
        output_ids = self.model.generate(**inputs, max_new_tokens=128)
        output_text = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
        
        if "### Response:" in output_text:
            predicted_aspects = output_text.split("### Response:")[-1].strip()
        else:
            predicted_aspects = output_text.strip()
        
        return [{"predicted": predicted_aspects}]