File size: 4,024 Bytes
caeb4d7
 
 
 
 
 
1e539ba
caeb4d7
 
 
 
 
 
 
 
d42abe7
 
caeb4d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch
import numpy as np
import pandas as pd
import re
import os
from collections import Counter

# ------------------ Review Generation ------------------
def generate_review(base_model, product, category, features, rating, tone, review_cache=None):
    """
    Generate a product review using LoRA fine-tuned model and apply repetition control.
    Optionally evaluates performance every 10 reviews.
    """
    adapter_path = "lora_adapter"
    #adapter_path = os.path.join(os.getcwd(), "lora_adapter")
    tokenizer = AutoTokenizer.from_pretrained(base_model)
    model = AutoModelForCausalLM.from_pretrained(base_model)
    model = PeftModel.from_pretrained(model, adapter_path)
    model.eval()

    prompt = (
        f"Product: {product}\n"
        f"Category: {category}\n"
        f"Features: {features}\n"
        f"Rating: {rating}\n"
        f"Tone: {tone}\n\nReview:"
    )

    inputs = tokenizer(prompt, return_tensors="pt")

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=180,
            temperature=0.8,
            top_p=0.9,
            repetition_penalty=1.8,
            no_repeat_ngram_size=3,
            do_sample=True
        )

    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # -------- Optional: Evaluation Trigger --------
    if review_cache is not None:
        review_cache.append(generated_text)
        if len(review_cache) % 10 == 0:
            metrics = compute_metrics(review_cache, requested_tone=tone)
            diversity = distinct_n_score(review_cache)
            metrics["distinct_n"] = diversity
            print(f"\n📊 Auto Evaluation after {len(review_cache)} reviews:")
            print(metrics)

    return generated_text


# ------------------ Evaluation Metrics ------------------
def compute_metrics(reviews, requested_tone="neutral"):
    """
    Compute simple text-level metrics:
    - avg_length: average word count
    - tone_match_ratio: how often requested tone appears
    """
    avg_length = np.mean([len(r.split()) for r in reviews]) if reviews else 0
    tone_match = sum(1 for r in reviews if re.search(requested_tone, r, re.IGNORECASE))
    tone_match_ratio = tone_match / len(reviews) if reviews else 0.0
    return {
        "avg_length": round(avg_length, 2),
        "tone_match_ratio": round(tone_match_ratio, 3)
    }


# ------------------ Diversity Metric ------------------
def distinct_n_score(texts, n=2):
    """
    Compute Distinct-N score (uniqueness measure).
    High values mean less repetition.
    """
    all_ngrams = []
    for text in texts:
        tokens = text.split()
        all_ngrams.extend(tuple(tokens[i:i+n]) for i in range(len(tokens)-n+1))
    if not all_ngrams:
        return 0.0
    unique_ngrams = len(set(all_ngrams))
    return round(unique_ngrams / len(all_ngrams), 3)


# ------------------ Perplexity Evaluation ------------------
def evaluate_perplexity(base_model, test_csv="dataset/amazon_product_reviews.csv"):
    """
    Compute perplexity on a small subset of test data.
    Lower perplexity = better model.
    """
    tokenizer = AutoTokenizer.from_pretrained(base_model)
    model = AutoModelForCausalLM.from_pretrained(base_model)
    model = PeftModel.from_pretrained(model, "./lora_adapter")
    model.eval()

    df = pd.read_csv(test_csv)
    texts = df["Review"].dropna().sample(min(50, len(df))).tolist()

    total_loss, total_tokens = 0, 0
    for text in texts:
        inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=256)
        with torch.no_grad():
            outputs = model(**inputs, labels=inputs["input_ids"])
        loss = outputs.loss.item()
        total_loss += loss * inputs["input_ids"].size(1)
        total_tokens += inputs["input_ids"].size(1)

    ppl = np.exp(total_loss / total_tokens) if total_tokens > 0 else float("inf")
    return round(ppl, 2)