Spaces:

asmashayea
/

absa-app

Sleeping

App Files Files Community

absa-app / seq2seq_inference.py

asmashayea

Add application file

6d91ffe 4 months ago

raw

history blame contribute delete

1.88 kB

	import json
	import torch
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
	from peft import PeftModel

	SYSTEM_PROMPT = (
	"You are an advanced AI model specialized in extracting aspects and determining their sentiment polarity from customer reviews.\n\n"
	"Instructions:\n"
	"1. Extract only the aspects (nouns) mentioned in the review.\n"
	"2. Assign a sentiment to each aspect: \"positive\", \"negative\", or \"neutral\".\n"
	"3. Return aspects in the same language as they appear.\n"
	"4. An aspect must be a noun that refers to a specific item or service the user described.\n"
	"5. Ignore adjectives, general ideas, and vague topics.\n"
	"6. Do NOT translate, explain, or add extra text.\n"
	"7. The output must be just a valid JSON list with 'aspect' and 'sentiment'. Start with `[` and stop at `]`.\n"
	"8. Do NOT output the instructions, review, or any text — only one output JSON list.\n"
	"9. Just one output and one review."
	)



	def infer_t5_prompt(review_text, tokenizer, peft_model):
	prompt = SYSTEM_PROMPT + f"\n\nReview: {review_text}"

	inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(peft_model.device)

	with torch.no_grad():
	outputs = peft_model.generate(
	**inputs,
	max_new_tokens=256,
	num_beams=4,
	do_sample=False,
	temperature=0.0,
	early_stopping=True,
	pad_token_id=tokenizer.pad_token_id,
	eos_token_id=tokenizer.eos_token_id,
	)

	decoded = tokenizer.decode(
	outputs[0],
	skip_special_tokens=True,
	clean_up_tokenization_spaces=False
	).strip()

	decoded = decoded.replace('<extra_id_0>', '').replace('</s>', '').strip()

	try:
	return json.loads(decoded)
	except json.JSONDecodeError:
	return decoded