User-Modeling-Agent / core /persona.py
Israelbliz's picture
Upload persona.py
74e7e35 verified
"""Persona engine β€” turn a user's review history into a behavioral fingerprint.
The persona is the spine of the whole system. Both tasks ask it different
questions:
Task A: "Given this persona and this item, how would the user rate and review it?"
Task B: "Given this persona, what items would the user want next?"
A persona has two layers:
1. Quantitative signals (computed deterministically from history)
- rating cadence: mean, std, distribution shape
- review length: mean, std
- vocabulary fingerprint: top distinctive terms
- domain mix: which categories the user engages with
- verified-purchase rate, helpful-vote signal
2. Qualitative summary (LLM-generated, cached)
- tone descriptor (snarky / earnest / analytical / casual / ...)
- common preferences (themes, styles)
- common complaints (deal-breakers)
- recommended audience for THIS user (one-liner persona pitch)
The qualitative layer is what makes generated reviews feel like the actual
user wrote them. Without it, you get generic LLM prose. With it, you get
behavioral fidelity β€” which is one of Task A's three scored axes.
"""
from __future__ import annotations
import logging
from collections import Counter
from dataclasses import dataclass, field, asdict
from typing import Any
import pandas as pd
from pydantic import BaseModel, Field
from core.llm import LLMClient
log = logging.getLogger(__name__)
# ──────────────────────────────────────────────────────────────────────────────
# Schemas
# ──────────────────────────────────────────────────────────────────────────────
class QualitativeSummary(BaseModel):
"""LLM-generated qualitative layer of a persona."""
tone: str = Field(description="One-word tone descriptor: snarky, earnest, analytical, casual, enthusiastic, terse, verbose, etc.")
preferred_themes: list[str] = Field(description="3-5 themes/styles/qualities this user gravitates toward")
common_complaints: list[str] = Field(description="2-4 recurring deal-breakers or critique patterns")
voice_one_liner: str = Field(description="A single sentence describing this user's reviewing voice as if pitching them to a casting director")
@dataclass
class UserPersona:
"""Complete persona β€” quantitative signals + qualitative summary + history."""
user_id: str
# Quantitative
n_reviews: int
avg_rating: float
std_rating: float
avg_review_length: float
std_review_length: float
verified_rate: float
domains: list[str]
n_domains: int
rating_distribution: dict[int, float] # {1: 0.05, 2: 0.1, ..., 5: 0.4}
top_terms: list[str] # vocabulary fingerprint
# Qualitative (lazily filled by PersonaEngine.enrich)
tone: str = ""
preferred_themes: list[str] = field(default_factory=list)
common_complaints: list[str] = field(default_factory=list)
voice_one_liner: str = ""
# Sample history for retrieval/grounding (subset of training reviews)
history_samples: list[dict[str, Any]] = field(default_factory=list)
def to_prompt_block(self) -> str:
"""Render the persona as a structured prompt section.
This text is what the LLM sees when generating reviews / recommendations.
Keeping it formatted consistently is what makes generation behaviorally
faithful.
"""
dist = " ".join(f"{r}β˜…:{p:.0%}" for r, p in sorted(self.rating_distribution.items()))
return (
f"USER PERSONA\n"
f" Reviews written: {self.n_reviews}\n"
f" Avg rating: {self.avg_rating:.2f} (Β±{self.std_rating:.2f})\n"
f" Rating distribution: {dist}\n"
f" Avg review length: {self.avg_review_length:.0f} words (Β±{self.std_review_length:.0f})\n"
f" Verified-purchase rate: {self.verified_rate:.0%}\n"
f" Active domains: {', '.join(self.domains)}\n"
f" Vocabulary fingerprint: {', '.join(self.top_terms[:15])}\n"
f" Tone: {self.tone or 'unspecified'}\n"
f" Preferred themes: {', '.join(self.preferred_themes) or 'unspecified'}\n"
f" Common complaints: {', '.join(self.common_complaints) or 'unspecified'}\n"
f" Voice: {self.voice_one_liner or 'unspecified'}\n"
)
def as_dict(self) -> dict:
return asdict(self)
# ──────────────────────────────────────────────────────────────────────────────
# Engine
# ──────────────────────────────────────────────────────────────────────────────
# A small set of generic English stopwords + Amazon-review noise. Keeping
# this in-module avoids pulling in nltk's download flow.
_STOPWORDS = set("""
a an the and or but if then else when while of in on at by to for with from
into onto over under is are was were be been being have has had do does did
i you he she it we they me him her us them my your his its our their this
that these those there here what which who whom whose how why so as too very
just also more most some any all each every other another such no not nor only
own same can will would could should might may must one two three really get
got gets just like dont didnt isnt arent wasnt werent havent hadnt hasnt cant
couldnt wouldnt shouldnt wont thats whats theres heres ive ill ive youve im
""".split())
class PersonaEngine:
"""Build personas from review history.
Two entry points:
from_dataframe(user_id, training_reviews_df) -> UserPersona
enrich(persona) -> UserPersona # adds qualitative summary via LLM
"""
def __init__(self, llm: LLMClient | None = None,
top_terms_k: int = 20,
history_samples_k: int = 8):
self.llm = llm or LLMClient()
self.top_terms_k = top_terms_k
self.history_samples_k = history_samples_k
# Enrichment cache β€” keyed by user_id. enrich() makes an LLM call per
# user; for a user already seen this session, the cached qualitative
# summary is reused. The result is identical (same user, same summary)
# β€” this only removes a redundant call, it never changes output.
self._enrichment_cache: dict[str, dict] = {}
# ─────────────────────────── Quantitative ────────────────────────────
def from_dataframe(self, user_id: str,
reviews: pd.DataFrame) -> UserPersona:
"""Build a UserPersona from a DataFrame of one user's training reviews.
Expected columns: user_id, parent_asin, rating, text, verified_purchase,
domain, timestamp.
"""
user_reviews = reviews[reviews["user_id"] == user_id]
if user_reviews.empty:
raise ValueError(f"No reviews found for user_id={user_id!r}")
ratings = user_reviews["rating"].astype(float)
lengths = user_reviews["text"].fillna("").str.split().str.len()
# Rating distribution as proportions
dist = ratings.round().astype(int).value_counts(normalize=True).to_dict()
rating_dist = {int(k): float(v) for k, v in dist.items()}
# Vocabulary fingerprint: most common non-stopword tokens
top_terms = self._top_terms(user_reviews["text"].tolist())
# Sample history items for retrieval grounding β€” keep the most recent
history = user_reviews.sort_values("timestamp", ascending=False) \
.head(self.history_samples_k)
history_samples = [
{
"parent_asin": row["parent_asin"],
"rating": float(row["rating"]),
"text": row["text"][:500],
"domain": row["domain"],
}
for _, row in history.iterrows()
]
return UserPersona(
user_id=user_id,
n_reviews=len(user_reviews),
avg_rating=float(ratings.mean()),
std_rating=float(ratings.std()) if len(ratings) > 1 else 0.0,
avg_review_length=float(lengths.mean()),
std_review_length=float(lengths.std()) if len(lengths) > 1 else 0.0,
verified_rate=float(user_reviews["verified_purchase"].mean()),
domains=sorted(user_reviews["domain"].unique().tolist()),
n_domains=int(user_reviews["domain"].nunique()),
rating_distribution=rating_dist,
top_terms=top_terms,
history_samples=history_samples,
)
def _top_terms(self, texts: list[str]) -> list[str]:
"""Most frequent content tokens, stopwords removed."""
counter: Counter = Counter()
for txt in texts:
if not isinstance(txt, str):
continue
tokens = [t.lower().strip(".,!?\"'()[]{}:;") for t in txt.split()]
tokens = [t for t in tokens
if t and len(t) > 2 and t not in _STOPWORDS and t.isalpha()]
counter.update(tokens)
return [w for w, _ in counter.most_common(self.top_terms_k)]
# ─────────────────────────── Qualitative ─────────────────────────────
def enrich(self, persona: UserPersona) -> UserPersona:
"""Add LLM-generated qualitative summary to an existing persona.
Uses the reasoning model (gpt-4o) β€” more reliable structured output
than the bulk model. If the LLM call still fails, falls back to a
deterministic summary derived from the writing samples so we never
end up with an empty Voice/Tone.
"""
if not persona.history_samples:
log.warning(f"User {persona.user_id} has no history samples; skipping enrichment")
return self._apply_deterministic_fallback(persona)
# Cache hit β€” reuse the qualitative summary computed earlier this
# session for this user. Identical result, one fewer LLM call.
cached = self._enrichment_cache.get(persona.user_id)
if cached is not None:
log.info(f"Persona enrichment cache hit for {persona.user_id}")
persona.tone = cached["tone"] or persona.tone
persona.preferred_themes = cached["preferred_themes"] or persona.preferred_themes
persona.common_complaints = cached["common_complaints"] or persona.common_complaints
persona.voice_one_liner = cached["voice_one_liner"] or persona.voice_one_liner
return persona
sample_block = "\n\n".join(
f"[{i+1}] Rating: {s['rating']}β˜… Domain: {s['domain']}\n{s['text'][:400]}"
for i, s in enumerate(persona.history_samples)
)
prompt = (
f"Below are review samples from a single user. Read them carefully "
f"and infer their reviewing voice.\n\n"
f"{sample_block}\n\n"
f"Quantitative signals about this user:\n"
f"- Average rating: {persona.avg_rating:.2f} of 5\n"
f"- Average review length: {persona.avg_review_length:.0f} words\n"
f"- Vocabulary they use often: {', '.join(persona.top_terms[:15])}\n\n"
f"Produce a qualitative summary of their reviewer voice. "
f"Be concise and concrete. If the samples are too sparse or generic, "
f"infer the most plausible voice rather than refusing."
)
try:
summary = self.llm.structured(
prompt, QualitativeSummary, model="reasoning",
system="You are a behavioral analyst specializing in online review patterns. Always produce valid output.",
)
persona.tone = summary.tone or persona.tone
persona.preferred_themes = summary.preferred_themes or persona.preferred_themes
persona.common_complaints = summary.common_complaints or persona.common_complaints
persona.voice_one_liner = summary.voice_one_liner or persona.voice_one_liner
# cache the successful summary for reuse this session
self._enrichment_cache[persona.user_id] = {
"tone": persona.tone,
"preferred_themes": persona.preferred_themes,
"common_complaints": persona.common_complaints,
"voice_one_liner": persona.voice_one_liner,
}
except Exception as e:
log.warning(f"LLM enrichment failed for {persona.user_id} ({type(e).__name__}); using deterministic fallback")
persona = self._apply_deterministic_fallback(persona)
return persona
@staticmethod
def _apply_deterministic_fallback(persona: UserPersona) -> UserPersona:
"""Fill in tone/themes/voice from quantitative signals when LLM fails.
This isn't as rich as an LLM summary, but it guarantees downstream
query construction has SOMETHING to work with β€” much better than
an empty string.
"""
# Tone bucket from avg rating
if persona.avg_rating >= 4.5:
tone = "enthusiastic"
elif persona.avg_rating >= 3.8:
tone = "earnest"
elif persona.avg_rating >= 3.0:
tone = "measured"
else:
tone = "critical"
# Use top distinctive terms as proxy themes (filter out true generics)
generic_terms = {"book", "read", "story", "movie", "film", "great", "good",
"really", "much", "first", "next", "through", "about"}
candidate_themes = [t for t in persona.top_terms if t not in generic_terms][:5]
themes = candidate_themes or persona.top_terms[:3]
# Domain-grounded voice
domain_str = "/".join(persona.domains) if persona.domains else "general"
length_descriptor = (
"writes brief reviews" if persona.avg_review_length < 30
else "writes detailed reviews" if persona.avg_review_length > 150
else "writes moderate-length reviews"
)
voice = (
f"A {tone} {domain_str} reviewer who {length_descriptor} "
f"(avg {persona.avg_rating:.1f}β˜… over {persona.n_reviews} reviews)."
)
if not persona.tone:
persona.tone = tone
if not persona.preferred_themes:
persona.preferred_themes = themes
if not persona.voice_one_liner:
persona.voice_one_liner = voice
return persona