Create handler.py
Browse files- handler.py +90 -0
handler.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import torch
|
| 3 |
+
from unsloth import FastLanguageModel
|
| 4 |
+
|
| 5 |
+
# Global model and tokenizer variables
|
| 6 |
+
model = None
|
| 7 |
+
tokenizer = None
|
| 8 |
+
|
| 9 |
+
# Define your InstructABSA instruction text (as used in your finetuning script)
|
| 10 |
+
instructabsa_instruction = (
|
| 11 |
+
"Definition: The output will be the aspects (both implicit and explicit) which have an associated opinion that are extracted from the input text. In cases where there are no aspects the output should be noaspectterm.\n"
|
| 12 |
+
"Positive example 1-\n"
|
| 13 |
+
"input: With the great variety on the menu, I eat here often and never get bored.\n"
|
| 14 |
+
"output: menu\n"
|
| 15 |
+
"Positive example 2-\n"
|
| 16 |
+
"input: Great food, good size menu, great service and an unpretensious setting.\n"
|
| 17 |
+
"output: food, menu, service, setting\n"
|
| 18 |
+
"Negative example 1-\n"
|
| 19 |
+
"input: They did not have mayonnaise, forgot our toast, left out ingredients (ie cheese in an omelet), below hot temperatures and the bacon was so over cooked it crumbled on the plate when you touched it.\n"
|
| 20 |
+
"output: toast, mayonnaise, bacon, ingredients, plate\n"
|
| 21 |
+
"Negative example 2-\n"
|
| 22 |
+
"input: The seats are uncomfortable if you are sitting against the wall on wooden benches.\n"
|
| 23 |
+
"output: seats\n"
|
| 24 |
+
"Neutral example 1-\n"
|
| 25 |
+
"input: I asked for seltzer with lime, no ice.\n"
|
| 26 |
+
"output: seltzer with lime\n"
|
| 27 |
+
"Neutral example 2-\n"
|
| 28 |
+
"input: They wouldnt even let me finish my glass of wine before offering another.\n"
|
| 29 |
+
"output: glass of wine\n"
|
| 30 |
+
"Now complete the following example:"
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# Define the Alpaca-style prompt template that wraps the instruction
|
| 34 |
+
alpaca_prompt = (
|
| 35 |
+
"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n"
|
| 36 |
+
"### Instruction:\n{}\n"
|
| 37 |
+
"### Input:\n{}\n"
|
| 38 |
+
"### Response:\n{}"
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
def initialize():
|
| 42 |
+
"""
|
| 43 |
+
This function loads the finetuned unsloth model and tokenizer.
|
| 44 |
+
Adjust the 'model_name' to point to your model repository.
|
| 45 |
+
"""
|
| 46 |
+
global model, tokenizer
|
| 47 |
+
# Set your finetuned model repository name here
|
| 48 |
+
model_name = "your-model-repo" # e.g., "unsloth/mistral-7b-instruct-v0.3-bnb-4bit-finetuned"
|
| 49 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 50 |
+
model_name=model_name,
|
| 51 |
+
max_seq_length=2048,
|
| 52 |
+
load_in_4bit=True # Change if necessary
|
| 53 |
+
)
|
| 54 |
+
# Set the model in inference mode
|
| 55 |
+
FastLanguageModel.for_inference(model)
|
| 56 |
+
model.eval()
|
| 57 |
+
|
| 58 |
+
def preprocess(request):
|
| 59 |
+
"""
|
| 60 |
+
Expects a JSON payload with an "inputs" field containing the raw review text.
|
| 61 |
+
It prepends the InstructABSA prompt (using the alpaca template) to the review.
|
| 62 |
+
"""
|
| 63 |
+
data = json.loads(request.data.decode("utf-8"))
|
| 64 |
+
raw_text = data.get("inputs", "")
|
| 65 |
+
# Construct the full prompt using the defined instruction and template.
|
| 66 |
+
full_prompt = alpaca_prompt.format(instructabsa_instruction, raw_text, "")
|
| 67 |
+
inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True)
|
| 68 |
+
device = next(model.parameters()).device
|
| 69 |
+
inputs = {key: tensor.to(device) for key, tensor in inputs.items()}
|
| 70 |
+
return inputs
|
| 71 |
+
|
| 72 |
+
def inference(inputs):
|
| 73 |
+
"""
|
| 74 |
+
Runs model.generate with a fixed maximum token limit.
|
| 75 |
+
"""
|
| 76 |
+
with torch.no_grad():
|
| 77 |
+
output_ids = model.generate(**inputs, max_new_tokens=128)
|
| 78 |
+
return output_ids
|
| 79 |
+
|
| 80 |
+
def postprocess(output_ids):
|
| 81 |
+
"""
|
| 82 |
+
Decodes the generated tokens to a string. If the generated text includes the response marker,
|
| 83 |
+
it extracts the text after "### Response:".
|
| 84 |
+
"""
|
| 85 |
+
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 86 |
+
if "### Response:" in output_text:
|
| 87 |
+
response = output_text.split("### Response:")[-1].strip()
|
| 88 |
+
else:
|
| 89 |
+
response = output_text.strip()
|
| 90 |
+
return json.dumps({"predicted": response})
|