Spaces:

Israelbliz
/

User-Modeling-Agent

Running

App Files Files Community

User-Modeling-Agent / core /persona.py

Israelbliz

Upload persona.py

74e7e35 verified about 18 hours ago

raw

history blame contribute delete

15.1 kB

	"""Persona engine — turn a user's review history into a behavioral fingerprint.

	The persona is the spine of the whole system. Both tasks ask it different
	questions:

	Task A: "Given this persona and this item, how would the user rate and review it?"
	Task B: "Given this persona, what items would the user want next?"

	A persona has two layers:

	1. Quantitative signals (computed deterministically from history)
	- rating cadence: mean, std, distribution shape
	- review length: mean, std
	- vocabulary fingerprint: top distinctive terms
	- domain mix: which categories the user engages with
	- verified-purchase rate, helpful-vote signal

	2. Qualitative summary (LLM-generated, cached)
	- tone descriptor (snarky / earnest / analytical / casual / ...)
	- common preferences (themes, styles)
	- common complaints (deal-breakers)
	- recommended audience for THIS user (one-liner persona pitch)

	The qualitative layer is what makes generated reviews feel like the actual
	user wrote them. Without it, you get generic LLM prose. With it, you get
	behavioral fidelity — which is one of Task A's three scored axes.
	"""
	from __future__ import annotations

	import logging
	from collections import Counter
	from dataclasses import dataclass, field, asdict
	from typing import Any

	import pandas as pd
	from pydantic import BaseModel, Field

	from core.llm import LLMClient

	log = logging.getLogger(__name__)


	# ──────────────────────────────────────────────────────────────────────────────
	# Schemas
	# ──────────────────────────────────────────────────────────────────────────────

	class QualitativeSummary(BaseModel):
	"""LLM-generated qualitative layer of a persona."""
	tone: str = Field(description="One-word tone descriptor: snarky, earnest, analytical, casual, enthusiastic, terse, verbose, etc.")
	preferred_themes: list[str] = Field(description="3-5 themes/styles/qualities this user gravitates toward")
	common_complaints: list[str] = Field(description="2-4 recurring deal-breakers or critique patterns")
	voice_one_liner: str = Field(description="A single sentence describing this user's reviewing voice as if pitching them to a casting director")


	@dataclass
	class UserPersona:
	"""Complete persona — quantitative signals + qualitative summary + history."""
	user_id: str

	# Quantitative
	n_reviews: int
	avg_rating: float
	std_rating: float
	avg_review_length: float
	std_review_length: float
	verified_rate: float
	domains: list[str]
	n_domains: int
	rating_distribution: dict[int, float] # {1: 0.05, 2: 0.1, ..., 5: 0.4}
	top_terms: list[str] # vocabulary fingerprint

	# Qualitative (lazily filled by PersonaEngine.enrich)
	tone: str = ""
	preferred_themes: list[str] = field(default_factory=list)
	common_complaints: list[str] = field(default_factory=list)
	voice_one_liner: str = ""

	# Sample history for retrieval/grounding (subset of training reviews)
	history_samples: list[dict[str, Any]] = field(default_factory=list)

	def to_prompt_block(self) -> str:
	"""Render the persona as a structured prompt section.

	This text is what the LLM sees when generating reviews / recommendations.
	Keeping it formatted consistently is what makes generation behaviorally
	faithful.
	"""
	dist = " ".join(f"{r}★:{p:.0%}" for r, p in sorted(self.rating_distribution.items()))
	return (
	f"USER PERSONA\n"
	f" Reviews written: {self.n_reviews}\n"
	f" Avg rating: {self.avg_rating:.2f} (±{self.std_rating:.2f})\n"
	f" Rating distribution: {dist}\n"
	f" Avg review length: {self.avg_review_length:.0f} words (±{self.std_review_length:.0f})\n"
	f" Verified-purchase rate: {self.verified_rate:.0%}\n"
	f" Active domains: {', '.join(self.domains)}\n"
	f" Vocabulary fingerprint: {', '.join(self.top_terms[:15])}\n"
	f" Tone: {self.tone or 'unspecified'}\n"
	f" Preferred themes: {', '.join(self.preferred_themes) or 'unspecified'}\n"
	f" Common complaints: {', '.join(self.common_complaints) or 'unspecified'}\n"
	f" Voice: {self.voice_one_liner or 'unspecified'}\n"
	)

	def as_dict(self) -> dict:
	return asdict(self)


	# ──────────────────────────────────────────────────────────────────────────────
	# Engine
	# ──────────────────────────────────────────────────────────────────────────────

	# A small set of generic English stopwords + Amazon-review noise. Keeping
	# this in-module avoids pulling in nltk's download flow.
	_STOPWORDS = set("""
	a an the and or but if then else when while of in on at by to for with from
	into onto over under is are was were be been being have has had do does did
	i you he she it we they me him her us them my your his its our their this
	that these those there here what which who whom whose how why so as too very
	just also more most some any all each every other another such no not nor only
	own same can will would could should might may must one two three really get
	got gets just like dont didnt isnt arent wasnt werent havent hadnt hasnt cant
	couldnt wouldnt shouldnt wont thats whats theres heres ive ill ive youve im
	""".split())


	class PersonaEngine:
	"""Build personas from review history.

	Two entry points:
	from_dataframe(user_id, training_reviews_df) -> UserPersona
	enrich(persona) -> UserPersona # adds qualitative summary via LLM
	"""

	def __init__(self, llm: LLMClient \| None = None,
	top_terms_k: int = 20,
	history_samples_k: int = 8):
	self.llm = llm or LLMClient()
	self.top_terms_k = top_terms_k
	self.history_samples_k = history_samples_k
	# Enrichment cache — keyed by user_id. enrich() makes an LLM call per
	# user; for a user already seen this session, the cached qualitative
	# summary is reused. The result is identical (same user, same summary)
	# — this only removes a redundant call, it never changes output.
	self._enrichment_cache: dict[str, dict] = {}

	# ─────────────────────────── Quantitative ────────────────────────────
	def from_dataframe(self, user_id: str,
	reviews: pd.DataFrame) -> UserPersona:
	"""Build a UserPersona from a DataFrame of one user's training reviews.

	Expected columns: user_id, parent_asin, rating, text, verified_purchase,
	domain, timestamp.
	"""
	user_reviews = reviews[reviews["user_id"] == user_id]
	if user_reviews.empty:
	raise ValueError(f"No reviews found for user_id={user_id!r}")

	ratings = user_reviews["rating"].astype(float)
	lengths = user_reviews["text"].fillna("").str.split().str.len()

	# Rating distribution as proportions
	dist = ratings.round().astype(int).value_counts(normalize=True).to_dict()
	rating_dist = {int(k): float(v) for k, v in dist.items()}

	# Vocabulary fingerprint: most common non-stopword tokens
	top_terms = self._top_terms(user_reviews["text"].tolist())

	# Sample history items for retrieval grounding — keep the most recent
	history = user_reviews.sort_values("timestamp", ascending=False) \
	.head(self.history_samples_k)
	history_samples = [
	{
	"parent_asin": row["parent_asin"],
	"rating": float(row["rating"]),
	"text": row["text"][:500],
	"domain": row["domain"],
	}
	for _, row in history.iterrows()
	]

	return UserPersona(
	user_id=user_id,
	n_reviews=len(user_reviews),
	avg_rating=float(ratings.mean()),
	std_rating=float(ratings.std()) if len(ratings) > 1 else 0.0,
	avg_review_length=float(lengths.mean()),
	std_review_length=float(lengths.std()) if len(lengths) > 1 else 0.0,
	verified_rate=float(user_reviews["verified_purchase"].mean()),
	domains=sorted(user_reviews["domain"].unique().tolist()),
	n_domains=int(user_reviews["domain"].nunique()),
	rating_distribution=rating_dist,
	top_terms=top_terms,
	history_samples=history_samples,
	)

	def _top_terms(self, texts: list[str]) -> list[str]:
	"""Most frequent content tokens, stopwords removed."""
	counter: Counter = Counter()
	for txt in texts:
	if not isinstance(txt, str):
	continue
	tokens = [t.lower().strip(".,!?\"'()[]{}:;") for t in txt.split()]
	tokens = [t for t in tokens
	if t and len(t) > 2 and t not in _STOPWORDS and t.isalpha()]
	counter.update(tokens)
	return [w for w, _ in counter.most_common(self.top_terms_k)]

	# ─────────────────────────── Qualitative ─────────────────────────────
	def enrich(self, persona: UserPersona) -> UserPersona:
	"""Add LLM-generated qualitative summary to an existing persona.

	Uses the reasoning model (gpt-4o) — more reliable structured output
	than the bulk model. If the LLM call still fails, falls back to a
	deterministic summary derived from the writing samples so we never
	end up with an empty Voice/Tone.
	"""
	if not persona.history_samples:
	log.warning(f"User {persona.user_id} has no history samples; skipping enrichment")
	return self._apply_deterministic_fallback(persona)

	# Cache hit — reuse the qualitative summary computed earlier this
	# session for this user. Identical result, one fewer LLM call.
	cached = self._enrichment_cache.get(persona.user_id)
	if cached is not None:
	log.info(f"Persona enrichment cache hit for {persona.user_id}")
	persona.tone = cached["tone"] or persona.tone
	persona.preferred_themes = cached["preferred_themes"] or persona.preferred_themes
	persona.common_complaints = cached["common_complaints"] or persona.common_complaints
	persona.voice_one_liner = cached["voice_one_liner"] or persona.voice_one_liner
	return persona

	sample_block = "\n\n".join(
	f"[{i+1}] Rating: {s['rating']}★ Domain: {s['domain']}\n{s['text'][:400]}"
	for i, s in enumerate(persona.history_samples)
	)

	prompt = (
	f"Below are review samples from a single user. Read them carefully "
	f"and infer their reviewing voice.\n\n"
	f"{sample_block}\n\n"
	f"Quantitative signals about this user:\n"
	f"- Average rating: {persona.avg_rating:.2f} of 5\n"
	f"- Average review length: {persona.avg_review_length:.0f} words\n"
	f"- Vocabulary they use often: {', '.join(persona.top_terms[:15])}\n\n"
	f"Produce a qualitative summary of their reviewer voice. "
	f"Be concise and concrete. If the samples are too sparse or generic, "
	f"infer the most plausible voice rather than refusing."
	)

	try:
	summary = self.llm.structured(
	prompt, QualitativeSummary, model="reasoning",
	system="You are a behavioral analyst specializing in online review patterns. Always produce valid output.",
	)
	persona.tone = summary.tone or persona.tone
	persona.preferred_themes = summary.preferred_themes or persona.preferred_themes
	persona.common_complaints = summary.common_complaints or persona.common_complaints
	persona.voice_one_liner = summary.voice_one_liner or persona.voice_one_liner
	# cache the successful summary for reuse this session
	self._enrichment_cache[persona.user_id] = {
	"tone": persona.tone,
	"preferred_themes": persona.preferred_themes,
	"common_complaints": persona.common_complaints,
	"voice_one_liner": persona.voice_one_liner,
	}
	except Exception as e:
	log.warning(f"LLM enrichment failed for {persona.user_id} ({type(e).__name__}); using deterministic fallback")
	persona = self._apply_deterministic_fallback(persona)

	return persona

	@staticmethod
	def _apply_deterministic_fallback(persona: UserPersona) -> UserPersona:
	"""Fill in tone/themes/voice from quantitative signals when LLM fails.

	This isn't as rich as an LLM summary, but it guarantees downstream
	query construction has SOMETHING to work with — much better than
	an empty string.
	"""
	# Tone bucket from avg rating
	if persona.avg_rating >= 4.5:
	tone = "enthusiastic"
	elif persona.avg_rating >= 3.8:
	tone = "earnest"
	elif persona.avg_rating >= 3.0:
	tone = "measured"
	else:
	tone = "critical"

	# Use top distinctive terms as proxy themes (filter out true generics)
	generic_terms = {"book", "read", "story", "movie", "film", "great", "good",
	"really", "much", "first", "next", "through", "about"}
	candidate_themes = [t for t in persona.top_terms if t not in generic_terms][:5]
	themes = candidate_themes or persona.top_terms[:3]

	# Domain-grounded voice
	domain_str = "/".join(persona.domains) if persona.domains else "general"
	length_descriptor = (
	"writes brief reviews" if persona.avg_review_length < 30
	else "writes detailed reviews" if persona.avg_review_length > 150
	else "writes moderate-length reviews"
	)
	voice = (
	f"A {tone} {domain_str} reviewer who {length_descriptor} "
	f"(avg {persona.avg_rating:.1f}★ over {persona.n_reviews} reviews)."
	)

	if not persona.tone:
	persona.tone = tone
	if not persona.preferred_themes:
	persona.preferred_themes = themes
	if not persona.voice_one_liner:
	persona.voice_one_liner = voice
	return persona