Spaces:
Sleeping
Sleeping
| import json | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| from peft import PeftModel | |
| SYSTEM_PROMPT = ( | |
| "You are an advanced AI model specialized in extracting aspects and determining their sentiment polarity from customer reviews.\n\n" | |
| "Instructions:\n" | |
| "1. Extract only the aspects (nouns) mentioned in the review.\n" | |
| "2. Assign a sentiment to each aspect: \"positive\", \"negative\", or \"neutral\".\n" | |
| "3. Return aspects in the same language as they appear.\n" | |
| "4. An aspect must be a noun that refers to a specific item or service the user described.\n" | |
| "5. Ignore adjectives, general ideas, and vague topics.\n" | |
| "6. Do NOT translate, explain, or add extra text.\n" | |
| "7. The output must be just a valid JSON list with 'aspect' and 'sentiment'. Start with `[` and stop at `]`.\n" | |
| "8. Do NOT output the instructions, review, or any text — only one output JSON list.\n" | |
| "9. Just one output and one review." | |
| ) | |
| def infer_t5_prompt(review_text, tokenizer, peft_model): | |
| prompt = SYSTEM_PROMPT + f"\n\nReview: {review_text}" | |
| inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(peft_model.device) | |
| with torch.no_grad(): | |
| outputs = peft_model.generate( | |
| **inputs, | |
| max_new_tokens=256, | |
| num_beams=4, | |
| do_sample=False, | |
| temperature=0.0, | |
| early_stopping=True, | |
| pad_token_id=tokenizer.pad_token_id, | |
| eos_token_id=tokenizer.eos_token_id, | |
| ) | |
| decoded = tokenizer.decode( | |
| outputs[0], | |
| skip_special_tokens=True, | |
| clean_up_tokenization_spaces=False | |
| ).strip() | |
| decoded = decoded.replace('<extra_id_0>', '').replace('</s>', '').strip() | |
| try: | |
| return json.loads(decoded) | |
| except json.JSONDecodeError: | |
| return decoded |