Mistral7b_AE_res / handler.py
RichardLu's picture
Update handler.py
88198cf verified
raw
history blame
4.48 kB
import os
from typing import Dict, List, Any
import torch
from unsloth import FastLanguageModel
from transformers import AutoTokenizer
class EndpointHandler:
def __init__(self, model_dir: str = ""):
# Retrieve the Hugging Face token from the environment variable.
hf_token = os.getenv("HF_TOKEN", None)
# If the endpoint provides '/repository', override with the intended model ID.
if model_dir == "/repository" or model_dir.strip() == "":
model_dir = "RichardLu/Mistral7b_AE_res"
# Configuration for inference.
max_seq_length = 2048
dtype = None # Auto-detect data type; adjust if needed.
load_in_4bit = True # Use 4-bit quantization to reduce memory usage.
# Load the model and tokenizer.
self.model, self.tokenizer = FastLanguageModel.from_pretrained(
model_name=model_dir,
max_seq_length=max_seq_length,
dtype=dtype,
load_in_4bit=load_in_4bit,
token=hf_token, # Use the HF token if provided.
trust_remote_code=True # Enable if your model requires remote code.
)
# Switch the model to inference mode.
FastLanguageModel.for_inference(self.model)
# Define the instruction text with examples for aspect extraction.
self.instructabsa_instruction = (
"Definition: The output will be the aspects (both implicit and explicit) which have an associated opinion that are extracted from the input text. "
"In cases where there are no aspects the output should be noaspectterm.\n"
"Positive example 1-\n"
"input: With the great variety on the menu, I eat here often and never get bored.\n"
"output: menu\n"
"Positive example 2-\n"
"input: Great food, good size menu, great service and an unpretensious setting.\n"
"output: food, menu, service, setting\n"
"Negative example 1-\n"
"input: They did not have mayonnaise, forgot our toast, left out ingredients (ie cheese in an omelet), below hot temperatures and the bacon was so over cooked it crumbled on the plate when you touched it.\n"
"output: toast, mayonnaise, bacon, ingredients, plate\n"
"Negative example 2-\n"
"input: The seats are uncomfortable if you are sitting against the wall on wooden benches.\n"
"output: seats\n"
"Neutral example 1-\n"
"input: I asked for seltzer with lime, no ice.\n"
"output: seltzer with lime\n"
"Neutral example 2-\n"
"input: They wouldnt even let me finish my glass of wine before offering another.\n"
"output: glass of wine\n"
"Now complete the following example:"
)
# Define the Alpaca-style prompt template.
self.alpaca_prompt = (
"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n"
"### Instruction:\n{}\n"
"### Input:\n{}\n"
"### Response:\n{}"
)
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
# Retrieve the input text from the request payload.
input_text = data.get("inputs", "")
if not input_text:
return [{"error": "No input provided."}]
# Build the final prompt using the template.
prompt = self.alpaca_prompt.format(self.instructabsa_instruction, input_text, "")
# Determine the device.
device = "cuda" if torch.cuda.is_available() else "cpu"
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
# Generate the model output (adjust max_new_tokens as needed).
output_ids = self.model.generate(**inputs, max_new_tokens=128)
output_text = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
# Extract the predicted aspects from the generated output.
if "### Response:" in output_text:
predicted_aspects = output_text.split("### Response:")[-1].strip()
else:
predicted_aspects = output_text.strip()
# Return the response in a JSON-serializable format.
return [{"predicted": predicted_aspects}]