Spaces:
Sleeping
Sleeping
File size: 4,024 Bytes
caeb4d7 1e539ba caeb4d7 d42abe7 caeb4d7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch
import numpy as np
import pandas as pd
import re
import os
from collections import Counter
# ------------------ Review Generation ------------------
def generate_review(base_model, product, category, features, rating, tone, review_cache=None):
"""
Generate a product review using LoRA fine-tuned model and apply repetition control.
Optionally evaluates performance every 10 reviews.
"""
adapter_path = "lora_adapter"
#adapter_path = os.path.join(os.getcwd(), "lora_adapter")
tokenizer = AutoTokenizer.from_pretrained(base_model)
model = AutoModelForCausalLM.from_pretrained(base_model)
model = PeftModel.from_pretrained(model, adapter_path)
model.eval()
prompt = (
f"Product: {product}\n"
f"Category: {category}\n"
f"Features: {features}\n"
f"Rating: {rating}\n"
f"Tone: {tone}\n\nReview:"
)
inputs = tokenizer(prompt, return_tensors="pt")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=180,
temperature=0.8,
top_p=0.9,
repetition_penalty=1.8,
no_repeat_ngram_size=3,
do_sample=True
)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# -------- Optional: Evaluation Trigger --------
if review_cache is not None:
review_cache.append(generated_text)
if len(review_cache) % 10 == 0:
metrics = compute_metrics(review_cache, requested_tone=tone)
diversity = distinct_n_score(review_cache)
metrics["distinct_n"] = diversity
print(f"\n📊 Auto Evaluation after {len(review_cache)} reviews:")
print(metrics)
return generated_text
# ------------------ Evaluation Metrics ------------------
def compute_metrics(reviews, requested_tone="neutral"):
"""
Compute simple text-level metrics:
- avg_length: average word count
- tone_match_ratio: how often requested tone appears
"""
avg_length = np.mean([len(r.split()) for r in reviews]) if reviews else 0
tone_match = sum(1 for r in reviews if re.search(requested_tone, r, re.IGNORECASE))
tone_match_ratio = tone_match / len(reviews) if reviews else 0.0
return {
"avg_length": round(avg_length, 2),
"tone_match_ratio": round(tone_match_ratio, 3)
}
# ------------------ Diversity Metric ------------------
def distinct_n_score(texts, n=2):
"""
Compute Distinct-N score (uniqueness measure).
High values mean less repetition.
"""
all_ngrams = []
for text in texts:
tokens = text.split()
all_ngrams.extend(tuple(tokens[i:i+n]) for i in range(len(tokens)-n+1))
if not all_ngrams:
return 0.0
unique_ngrams = len(set(all_ngrams))
return round(unique_ngrams / len(all_ngrams), 3)
# ------------------ Perplexity Evaluation ------------------
def evaluate_perplexity(base_model, test_csv="dataset/amazon_product_reviews.csv"):
"""
Compute perplexity on a small subset of test data.
Lower perplexity = better model.
"""
tokenizer = AutoTokenizer.from_pretrained(base_model)
model = AutoModelForCausalLM.from_pretrained(base_model)
model = PeftModel.from_pretrained(model, "./lora_adapter")
model.eval()
df = pd.read_csv(test_csv)
texts = df["Review"].dropna().sample(min(50, len(df))).tolist()
total_loss, total_tokens = 0, 0
for text in texts:
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=256)
with torch.no_grad():
outputs = model(**inputs, labels=inputs["input_ids"])
loss = outputs.loss.item()
total_loss += loss * inputs["input_ids"].size(1)
total_tokens += inputs["input_ids"].size(1)
ppl = np.exp(total_loss / total_tokens) if total_tokens > 0 else float("inf")
return round(ppl, 2)
|