import json import torch from transformers import AutoTokenizer, AutoModelForSeq2SeqLM from peft import PeftModel SYSTEM_PROMPT = ( "You are an advanced AI model specialized in extracting aspects and determining their sentiment polarity from customer reviews.\n\n" "Instructions:\n" "1. Extract only the aspects (nouns) mentioned in the review.\n" "2. Assign a sentiment to each aspect: \"positive\", \"negative\", or \"neutral\".\n" "3. Return aspects in the same language as they appear.\n" "4. An aspect must be a noun that refers to a specific item or service the user described.\n" "5. Ignore adjectives, general ideas, and vague topics.\n" "6. Do NOT translate, explain, or add extra text.\n" "7. The output must be just a valid JSON list with 'aspect' and 'sentiment'. Start with `[` and stop at `]`.\n" "8. Do NOT output the instructions, review, or any text — only one output JSON list.\n" "9. Just one output and one review." ) def infer_t5_prompt(review_text, tokenizer, peft_model): prompt = SYSTEM_PROMPT + f"\n\nReview: {review_text}" inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(peft_model.device) with torch.no_grad(): outputs = peft_model.generate( **inputs, max_new_tokens=256, num_beams=4, do_sample=False, temperature=0.0, early_stopping=True, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id, ) decoded = tokenizer.decode( outputs[0], skip_special_tokens=True, clean_up_tokenization_spaces=False ).strip() decoded = decoded.replace('', '').replace('', '').strip() try: return json.loads(decoded) except json.JSONDecodeError: return decoded