Israelbliz commited on
Commit
72a7c36
·
verified ·
1 Parent(s): ad45209

Delete core/persona.py

Browse files
Files changed (1) hide show
  1. core/persona.py +0 -291
core/persona.py DELETED
@@ -1,291 +0,0 @@
1
- """Persona engine — turn a user's review history into a behavioral fingerprint.
2
-
3
- The persona is the spine of the whole system. Both tasks ask it different
4
- questions:
5
-
6
- Task A: "Given this persona and this item, how would the user rate and review it?"
7
- Task B: "Given this persona, what items would the user want next?"
8
-
9
- A persona has two layers:
10
-
11
- 1. Quantitative signals (computed deterministically from history)
12
- - rating cadence: mean, std, distribution shape
13
- - review length: mean, std
14
- - vocabulary fingerprint: top distinctive terms
15
- - domain mix: which categories the user engages with
16
- - verified-purchase rate, helpful-vote signal
17
-
18
- 2. Qualitative summary (LLM-generated, cached)
19
- - tone descriptor (snarky / earnest / analytical / casual / ...)
20
- - common preferences (themes, styles)
21
- - common complaints (deal-breakers)
22
- - recommended audience for THIS user (one-liner persona pitch)
23
-
24
- The qualitative layer is what makes generated reviews feel like the actual
25
- user wrote them. Without it, you get generic LLM prose. With it, you get
26
- behavioral fidelity — which is one of Task A's three scored axes.
27
- """
28
- from __future__ import annotations
29
-
30
- import logging
31
- from collections import Counter
32
- from dataclasses import dataclass, field, asdict
33
- from typing import Any
34
-
35
- import pandas as pd
36
- from pydantic import BaseModel, Field
37
-
38
- from core.llm import LLMClient
39
-
40
- log = logging.getLogger(__name__)
41
-
42
-
43
- # ──────────────────────────────────────────────────────────────────────────────
44
- # Schemas
45
- # ──────────────────────────────────────────────────────────────────────────────
46
-
47
- class QualitativeSummary(BaseModel):
48
- """LLM-generated qualitative layer of a persona."""
49
- tone: str = Field(description="One-word tone descriptor: snarky, earnest, analytical, casual, enthusiastic, terse, verbose, etc.")
50
- preferred_themes: list[str] = Field(description="3-5 themes/styles/qualities this user gravitates toward")
51
- common_complaints: list[str] = Field(description="2-4 recurring deal-breakers or critique patterns")
52
- voice_one_liner: str = Field(description="A single sentence describing this user's reviewing voice as if pitching them to a casting director")
53
-
54
-
55
- @dataclass
56
- class UserPersona:
57
- """Complete persona — quantitative signals + qualitative summary + history."""
58
- user_id: str
59
-
60
- # Quantitative
61
- n_reviews: int
62
- avg_rating: float
63
- std_rating: float
64
- avg_review_length: float
65
- std_review_length: float
66
- verified_rate: float
67
- domains: list[str]
68
- n_domains: int
69
- rating_distribution: dict[int, float] # {1: 0.05, 2: 0.1, ..., 5: 0.4}
70
- top_terms: list[str] # vocabulary fingerprint
71
-
72
- # Qualitative (lazily filled by PersonaEngine.enrich)
73
- tone: str = ""
74
- preferred_themes: list[str] = field(default_factory=list)
75
- common_complaints: list[str] = field(default_factory=list)
76
- voice_one_liner: str = ""
77
-
78
- # Sample history for retrieval/grounding (subset of training reviews)
79
- history_samples: list[dict[str, Any]] = field(default_factory=list)
80
-
81
- def to_prompt_block(self) -> str:
82
- """Render the persona as a structured prompt section.
83
-
84
- This text is what the LLM sees when generating reviews / recommendations.
85
- Keeping it formatted consistently is what makes generation behaviorally
86
- faithful.
87
- """
88
- dist = " ".join(f"{r}★:{p:.0%}" for r, p in sorted(self.rating_distribution.items()))
89
- return (
90
- f"USER PERSONA\n"
91
- f" Reviews written: {self.n_reviews}\n"
92
- f" Avg rating: {self.avg_rating:.2f} (±{self.std_rating:.2f})\n"
93
- f" Rating distribution: {dist}\n"
94
- f" Avg review length: {self.avg_review_length:.0f} words (±{self.std_review_length:.0f})\n"
95
- f" Verified-purchase rate: {self.verified_rate:.0%}\n"
96
- f" Active domains: {', '.join(self.domains)}\n"
97
- f" Vocabulary fingerprint: {', '.join(self.top_terms[:15])}\n"
98
- f" Tone: {self.tone or 'unspecified'}\n"
99
- f" Preferred themes: {', '.join(self.preferred_themes) or 'unspecified'}\n"
100
- f" Common complaints: {', '.join(self.common_complaints) or 'unspecified'}\n"
101
- f" Voice: {self.voice_one_liner or 'unspecified'}\n"
102
- )
103
-
104
- def as_dict(self) -> dict:
105
- return asdict(self)
106
-
107
-
108
- # ──────────────────────────────────────────────────────────────────────────────
109
- # Engine
110
- # ──────────────────────────────────────────────────────────────────────────────
111
-
112
- # A small set of generic English stopwords + Amazon-review noise. Keeping
113
- # this in-module avoids pulling in nltk's download flow.
114
- _STOPWORDS = set("""
115
- a an the and or but if then else when while of in on at by to for with from
116
- into onto over under is are was were be been being have has had do does did
117
- i you he she it we they me him her us them my your his its our their this
118
- that these those there here what which who whom whose how why so as too very
119
- just also more most some any all each every other another such no not nor only
120
- own same can will would could should might may must one two three really get
121
- got gets just like dont didnt isnt arent wasnt werent havent hadnt hasnt cant
122
- couldnt wouldnt shouldnt wont thats whats theres heres ive ill ive youve im
123
- """.split())
124
-
125
-
126
- class PersonaEngine:
127
- """Build personas from review history.
128
-
129
- Two entry points:
130
- from_dataframe(user_id, training_reviews_df) -> UserPersona
131
- enrich(persona) -> UserPersona # adds qualitative summary via LLM
132
- """
133
-
134
- def __init__(self, llm: LLMClient | None = None,
135
- top_terms_k: int = 20,
136
- history_samples_k: int = 8):
137
- self.llm = llm or LLMClient()
138
- self.top_terms_k = top_terms_k
139
- self.history_samples_k = history_samples_k
140
-
141
- # ─────────────────────────── Quantitative ────────────────────────────
142
- def from_dataframe(self, user_id: str,
143
- reviews: pd.DataFrame) -> UserPersona:
144
- """Build a UserPersona from a DataFrame of one user's training reviews.
145
-
146
- Expected columns: user_id, parent_asin, rating, text, verified_purchase,
147
- domain, timestamp.
148
- """
149
- user_reviews = reviews[reviews["user_id"] == user_id]
150
- if user_reviews.empty:
151
- raise ValueError(f"No reviews found for user_id={user_id!r}")
152
-
153
- ratings = user_reviews["rating"].astype(float)
154
- lengths = user_reviews["text"].fillna("").str.split().str.len()
155
-
156
- # Rating distribution as proportions
157
- dist = ratings.round().astype(int).value_counts(normalize=True).to_dict()
158
- rating_dist = {int(k): float(v) for k, v in dist.items()}
159
-
160
- # Vocabulary fingerprint: most common non-stopword tokens
161
- top_terms = self._top_terms(user_reviews["text"].tolist())
162
-
163
- # Sample history items for retrieval grounding — keep the most recent
164
- history = user_reviews.sort_values("timestamp", ascending=False) \
165
- .head(self.history_samples_k)
166
- history_samples = [
167
- {
168
- "parent_asin": row["parent_asin"],
169
- "rating": float(row["rating"]),
170
- "text": row["text"][:500],
171
- "domain": row["domain"],
172
- }
173
- for _, row in history.iterrows()
174
- ]
175
-
176
- return UserPersona(
177
- user_id=user_id,
178
- n_reviews=len(user_reviews),
179
- avg_rating=float(ratings.mean()),
180
- std_rating=float(ratings.std()) if len(ratings) > 1 else 0.0,
181
- avg_review_length=float(lengths.mean()),
182
- std_review_length=float(lengths.std()) if len(lengths) > 1 else 0.0,
183
- verified_rate=float(user_reviews["verified_purchase"].mean()),
184
- domains=sorted(user_reviews["domain"].unique().tolist()),
185
- n_domains=int(user_reviews["domain"].nunique()),
186
- rating_distribution=rating_dist,
187
- top_terms=top_terms,
188
- history_samples=history_samples,
189
- )
190
-
191
- def _top_terms(self, texts: list[str]) -> list[str]:
192
- """Most frequent content tokens, stopwords removed."""
193
- counter: Counter = Counter()
194
- for txt in texts:
195
- if not isinstance(txt, str):
196
- continue
197
- tokens = [t.lower().strip(".,!?\"'()[]{}:;") for t in txt.split()]
198
- tokens = [t for t in tokens
199
- if t and len(t) > 2 and t not in _STOPWORDS and t.isalpha()]
200
- counter.update(tokens)
201
- return [w for w, _ in counter.most_common(self.top_terms_k)]
202
-
203
- # ─────────────────────────── Qualitative ─────────────────────────────
204
- def enrich(self, persona: UserPersona) -> UserPersona:
205
- """Add LLM-generated qualitative summary to an existing persona.
206
-
207
- Uses the reasoning model (gpt-4o) — more reliable structured output
208
- than the bulk model. If the LLM call still fails, falls back to a
209
- deterministic summary derived from the writing samples so we never
210
- end up with an empty Voice/Tone.
211
- """
212
- if not persona.history_samples:
213
- log.warning(f"User {persona.user_id} has no history samples; skipping enrichment")
214
- return self._apply_deterministic_fallback(persona)
215
-
216
- sample_block = "\n\n".join(
217
- f"[{i+1}] Rating: {s['rating']}★ Domain: {s['domain']}\n{s['text'][:400]}"
218
- for i, s in enumerate(persona.history_samples)
219
- )
220
-
221
- prompt = (
222
- f"Below are review samples from a single user. Read them carefully "
223
- f"and infer their reviewing voice.\n\n"
224
- f"{sample_block}\n\n"
225
- f"Quantitative signals about this user:\n"
226
- f"- Average rating: {persona.avg_rating:.2f} of 5\n"
227
- f"- Average review length: {persona.avg_review_length:.0f} words\n"
228
- f"- Vocabulary they use often: {', '.join(persona.top_terms[:15])}\n\n"
229
- f"Produce a qualitative summary of their reviewer voice. "
230
- f"Be concise and concrete. If the samples are too sparse or generic, "
231
- f"infer the most plausible voice rather than refusing."
232
- )
233
-
234
- try:
235
- summary = self.llm.structured(
236
- prompt, QualitativeSummary, model="reasoning",
237
- system="You are a behavioral analyst specializing in online review patterns. Always produce valid output.",
238
- )
239
- persona.tone = summary.tone or persona.tone
240
- persona.preferred_themes = summary.preferred_themes or persona.preferred_themes
241
- persona.common_complaints = summary.common_complaints or persona.common_complaints
242
- persona.voice_one_liner = summary.voice_one_liner or persona.voice_one_liner
243
- except Exception as e:
244
- log.warning(f"LLM enrichment failed for {persona.user_id} ({type(e).__name__}); using deterministic fallback")
245
- persona = self._apply_deterministic_fallback(persona)
246
-
247
- return persona
248
-
249
- @staticmethod
250
- def _apply_deterministic_fallback(persona: UserPersona) -> UserPersona:
251
- """Fill in tone/themes/voice from quantitative signals when LLM fails.
252
-
253
- This isn't as rich as an LLM summary, but it guarantees downstream
254
- query construction has SOMETHING to work with — much better than
255
- an empty string.
256
- """
257
- # Tone bucket from avg rating
258
- if persona.avg_rating >= 4.5:
259
- tone = "enthusiastic"
260
- elif persona.avg_rating >= 3.8:
261
- tone = "earnest"
262
- elif persona.avg_rating >= 3.0:
263
- tone = "measured"
264
- else:
265
- tone = "critical"
266
-
267
- # Use top distinctive terms as proxy themes (filter out true generics)
268
- generic_terms = {"book", "read", "story", "movie", "film", "great", "good",
269
- "really", "much", "first", "next", "through", "about"}
270
- candidate_themes = [t for t in persona.top_terms if t not in generic_terms][:5]
271
- themes = candidate_themes or persona.top_terms[:3]
272
-
273
- # Domain-grounded voice
274
- domain_str = "/".join(persona.domains) if persona.domains else "general"
275
- length_descriptor = (
276
- "writes brief reviews" if persona.avg_review_length < 30
277
- else "writes detailed reviews" if persona.avg_review_length > 150
278
- else "writes moderate-length reviews"
279
- )
280
- voice = (
281
- f"A {tone} {domain_str} reviewer who {length_descriptor} "
282
- f"(avg {persona.avg_rating:.1f}★ over {persona.n_reviews} reviews)."
283
- )
284
-
285
- if not persona.tone:
286
- persona.tone = tone
287
- if not persona.preferred_themes:
288
- persona.preferred_themes = themes
289
- if not persona.voice_one_liner:
290
- persona.voice_one_liner = voice
291
- return persona