Israelbliz commited on
Commit
1539e17
Β·
verified Β·
1 Parent(s): bad8292

Upload task_a_user_modeling

Browse files
task_a_user_modeling/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ """Task A β€” User Modeling.
2
+
3
+ Given a user persona and product details, generate a rating + review that
4
+ match the user's behavioral fingerprint.
5
+ """
task_a_user_modeling/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (314 Bytes). View file
 
task_a_user_modeling/__pycache__/agent.cpython-313.pyc ADDED
Binary file (13.7 kB). View file
 
task_a_user_modeling/agent.py ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Task A agent β€” the Impersonator.
2
+
3
+ Given a UserPersona and an item (title, description, categories, domain),
4
+ produce a predicted rating and a generated review that match the user's
5
+ behavioral voice.
6
+
7
+ The workflow is a deterministic 4-step pipeline:
8
+
9
+ 1. select_similar_history(persona, item)
10
+ β†’ pick the 3 most similar past reviews from the persona's history
11
+ β†’ "similar" means same domain when possible, else any
12
+ β†’ these ground the generation in the user's actual writing samples
13
+ 2. build_prompt(persona, item, similar_history)
14
+ β†’ render the persona + similar reviews + item into a structured prompt
15
+ β†’ the prompt is what the LLM sees
16
+ 3. llm.structured(prompt, ReviewOutput)
17
+ β†’ call GPT-4o (reasoning tier) and parse into a Pydantic schema
18
+ β†’ schema enforces (rating: float, review: str, reasoning: str)
19
+ 4. postprocess(output, persona)
20
+ β†’ clamp rating to 1-5
21
+ β†’ if naija_mode is on, run the review through the style layer
22
+
23
+ The reasoning field is mandatory and exposed in the API response. This is
24
+ how the system demonstrates "intelligence per feature" β€” every generated
25
+ review comes with a sentence explaining why this rating, grounded in the
26
+ persona's signals.
27
+ """
28
+ from __future__ import annotations
29
+
30
+ import logging
31
+ from dataclasses import dataclass, field
32
+ from typing import Optional
33
+
34
+ from pydantic import BaseModel, Field
35
+
36
+ from core.llm import LLMClient
37
+ from core.persona import UserPersona
38
+ from core.nigerian import naija_style_review
39
+ from core.reflection import reflect_on_review, ReflectionTrace
40
+
41
+ log = logging.getLogger(__name__)
42
+
43
+
44
+ # ──────────────────────────────────────────────────────────────────────────────
45
+ # Schemas
46
+ # ──────────────────────────────────────────────────────────────────────────────
47
+
48
+ class ItemInput(BaseModel):
49
+ """Item details given to the Impersonator."""
50
+ parent_asin: str = Field(description="Item ID")
51
+ title: str = Field(description="Item title")
52
+ description: str = Field(default="", description="Item description / synopsis")
53
+ categories: str = Field(default="", description="Category breadcrumbs")
54
+ domain: str = Field(description="Books / Movies_and_TV / Kindle_Store")
55
+ average_rating: Optional[float] = Field(default=None, description="Crowd average rating, if known")
56
+
57
+
58
+ class GeneratedReview(BaseModel):
59
+ """Structured output from the LLM."""
60
+ rating: float = Field(description="Star rating, 1.0 to 5.0, half-stars allowed")
61
+ review: str = Field(description="The full review text in this user's voice")
62
+ reasoning: str = Field(description="One-sentence justification grounded in the user's persona signals")
63
+
64
+
65
+ @dataclass
66
+ class ImpersonationResult:
67
+ """Final output returned by the agent."""
68
+ rating: float
69
+ review: str
70
+ reasoning: str
71
+ used_history_count: int # how many past reviews informed the generation
72
+ naija_mode: bool
73
+ # Self-reflection metadata (Stage 3b)
74
+ reflection_iterations: int = 0 # how many critique cycles ran
75
+ reflection_refined: bool = False # whether the review was revised
76
+ reflection_notes: list[str] = field(default_factory=list) # critique findings
77
+
78
+
79
+ # ──────────────────────────────────────────────────────────────────────────────
80
+ # Workflow steps
81
+ # ──────────────────────────────────────────────────────────────────────────────
82
+
83
+ def select_similar_history(persona: UserPersona, item: ItemInput,
84
+ k: int = 3) -> list[dict]:
85
+ """Pick up to k past reviews to ground the generation.
86
+
87
+ Preference order:
88
+ 1. same domain as the item
89
+ 2. any domain (fallback)
90
+ Within each group we just take the most recent (history_samples is
91
+ already sorted by recency-desc from the persona builder).
92
+ """
93
+ if not persona.history_samples:
94
+ return []
95
+
96
+ same_domain = [s for s in persona.history_samples if s["domain"] == item.domain]
97
+ other_domain = [s for s in persona.history_samples if s["domain"] != item.domain]
98
+
99
+ chosen = same_domain[:k]
100
+ if len(chosen) < k:
101
+ chosen.extend(other_domain[:(k - len(chosen))])
102
+ return chosen
103
+
104
+
105
+ def build_prompt(persona: UserPersona, item: ItemInput,
106
+ similar_history: list[dict]) -> str:
107
+ """Render the impersonation prompt.
108
+
109
+ Three sections:
110
+ - PERSONA: who the user is, quantitative + qualitative
111
+ - WRITING SAMPLES: actual reviews this user wrote
112
+ - TARGET ITEM: the new thing they need to review
113
+
114
+ The prompt is deliberately structured so the LLM has a clear template
115
+ to follow and grounds outputs in real data.
116
+ """
117
+ parts = ["You are simulating a real Amazon reviewer. Generate a review that authentically reflects their voice, rating tendencies, and behavioral patterns.\n"]
118
+
119
+ parts.append("=" * 60)
120
+ parts.append("THE USER YOU ARE SIMULATING")
121
+ parts.append("=" * 60)
122
+ parts.append(persona.to_prompt_block())
123
+
124
+ if similar_history:
125
+ parts.append("=" * 60)
126
+ parts.append(f"ACTUAL REVIEWS THIS USER WROTE (study the voice carefully)")
127
+ parts.append("=" * 60)
128
+ for i, h in enumerate(similar_history, 1):
129
+ parts.append(f"\n[Sample {i}] {h['rating']}β˜… in {h['domain']}:")
130
+ parts.append(h["text"][:600])
131
+
132
+ parts.append("\n" + "=" * 60)
133
+ parts.append("NEW ITEM TO REVIEW")
134
+ parts.append("=" * 60)
135
+ parts.append(f"Domain: {item.domain}")
136
+ parts.append(f"Title: {item.title}")
137
+ if item.categories:
138
+ parts.append(f"Categories: {item.categories}")
139
+ if item.description:
140
+ parts.append(f"Description: {item.description[:800]}")
141
+ if item.average_rating:
142
+ parts.append(f"Crowd average: {item.average_rating:.1f}β˜…")
143
+
144
+ parts.append("\n" + "=" * 60)
145
+ parts.append("YOUR TASK")
146
+ parts.append("=" * 60)
147
+ parts.append(
148
+ "Produce three things.\n\n"
149
+ "1. A RATING from 1.0 to 5.0. Predict it in TWO explicit steps:\n"
150
+ " Step A β€” The PRIOR: what does this user usually give? Look at their\n"
151
+ " rating distribution and average. This is your starting point.\n"
152
+ " Step B β€” The ITEM EVIDENCE: now read the NEW ITEM carefully. The\n"
153
+ " title, description, and any crowd average carry signal about\n"
154
+ " whether THIS specific item is a hit or a miss FOR THIS USER.\n"
155
+ " - A title or description with negative/lukewarm language\n"
156
+ " (e.g. 'capable of better', 'lost than found', 'disappointing')\n"
157
+ " pulls the rating DOWN β€” even for a generous user.\n"
158
+ " - Rich, substantive material that fits the user's stated tastes\n"
159
+ " pulls the rating UP β€” even for a critical user. A critical\n"
160
+ " reviewer still gives 4-5β˜… to things that genuinely engage them.\n"
161
+ " - Do not assume 'critical tone' means the user dislikes things;\n"
162
+ " critical users rate highly when the material rewards their\n"
163
+ " attention. Do not assume a generous user gives 5β˜… to\n"
164
+ " everything; they still give 4β˜… to mild disappointments.\n"
165
+ " Final rating = the PRIOR adjusted by the ITEM EVIDENCE. If the\n"
166
+ " item evidence is neutral or absent, stay near the prior. If the\n"
167
+ " item evidence clearly points somewhere, MOVE toward it.\n\n"
168
+ "2. A REVIEW in this user's voice β€” match their length, tone,\n"
169
+ " vocabulary, and quirks visible in their writing samples\n"
170
+ " (capitalization, sentence structure, how they signal approval or\n"
171
+ " disapproval). The review's sentiment MUST be consistent with the\n"
172
+ " rating you chose.\n\n"
173
+ "3. A one-sentence REASONING explaining the rating. It MUST cite BOTH\n"
174
+ " (a) the persona prior AND (b) the specific item evidence that\n"
175
+ " adjusted it β€” e.g. 'This user averages 4.8β˜…, but the title signals\n"
176
+ " \"capable of better\", a mild letdown, so 4β˜… not 5β˜….'"
177
+ )
178
+
179
+ return "\n".join(parts)
180
+
181
+
182
+ def postprocess(output: GeneratedReview, persona: UserPersona,
183
+ naija_mode: bool, llm: LLMClient) -> GeneratedReview:
184
+ """Clamp rating, optionally apply Naija style transfer."""
185
+ # Clamp to [1.0, 5.0] and snap to nearest half-star
186
+ rating = max(1.0, min(5.0, output.rating))
187
+ rating = round(rating * 2) / 2
188
+
189
+ review = output.review.strip()
190
+ if naija_mode and review:
191
+ try:
192
+ review = naija_style_review(review, llm=llm)
193
+ except Exception as e:
194
+ log.warning(f"Naija style transfer failed; returning original. ({e})")
195
+
196
+ return GeneratedReview(rating=rating, review=review, reasoning=output.reasoning)
197
+
198
+
199
+ # ──────────────────────────────────────────────────────────────────────────────
200
+ # Agent
201
+ # ──────────────────────────────────────────────────────────────────────────────
202
+
203
+ class ImpersonationAgent:
204
+ """The Task A agent.
205
+
206
+ Usage:
207
+ agent = ImpersonationAgent()
208
+ result = agent.run(persona, item, naija_mode=False)
209
+ # result.rating, result.review, result.reasoning
210
+ """
211
+
212
+ def __init__(self, llm: LLMClient | None = None,
213
+ history_samples_k: int = 3,
214
+ use_reflection: bool = True,
215
+ reflection_max_iterations: int = 2):
216
+ self.llm = llm or LLMClient()
217
+ self.history_samples_k = history_samples_k
218
+ self.use_reflection = use_reflection
219
+ self.reflection_max_iterations = reflection_max_iterations
220
+
221
+ def run(self, persona: UserPersona, item: ItemInput,
222
+ naija_mode: bool = False) -> ImpersonationResult:
223
+ # Step 1: select grounding history
224
+ similar = select_similar_history(persona, item, k=self.history_samples_k)
225
+ log.info(f"Selected {len(similar)} similar history items for grounding")
226
+
227
+ # Step 2: build prompt
228
+ prompt = build_prompt(persona, item, similar)
229
+
230
+ # Step 3: LLM call with structured output
231
+ log.info(f"Calling LLM for impersonation of user {persona.user_id} on item {item.parent_asin}")
232
+ raw_output = self.llm.structured(
233
+ prompt,
234
+ schema=GeneratedReview,
235
+ model="reasoning",
236
+ system="You are an expert behavioral simulator. You write reviews exactly as the specified user would write them, matching their tone, length, rating patterns, and quirks.",
237
+ )
238
+
239
+ # Step 4: self-reflection β€” critique + refine (Stage 3b)
240
+ reflection_iterations = 0
241
+ reflection_refined = False
242
+ reflection_notes: list[str] = []
243
+ rating, review = raw_output.rating, raw_output.review
244
+ if self.use_reflection:
245
+ log.info("Running self-reflection on generated review")
246
+ rating, review, trace = reflect_on_review(
247
+ self.llm, persona,
248
+ item_title=item.title, item_domain=item.domain,
249
+ rating=rating, review=review,
250
+ max_iterations=self.reflection_max_iterations,
251
+ )
252
+ reflection_iterations = trace.iterations_run
253
+ reflection_refined = trace.refined
254
+ reflection_notes = list(trace.critiques)
255
+
256
+ refined_output = GeneratedReview(
257
+ rating=rating, review=review, reasoning=raw_output.reasoning,
258
+ )
259
+
260
+ # Step 5: postprocess (clamp rating, optional naija style)
261
+ final = postprocess(refined_output, persona, naija_mode=naija_mode, llm=self.llm)
262
+
263
+ return ImpersonationResult(
264
+ rating=final.rating,
265
+ review=final.review,
266
+ reasoning=final.reasoning,
267
+ used_history_count=len(similar),
268
+ naija_mode=naija_mode,
269
+ reflection_iterations=reflection_iterations,
270
+ reflection_refined=reflection_refined,
271
+ reflection_notes=reflection_notes,
272
+ )
task_a_user_modeling/main.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Task A service β€” FastAPI wrapper around the Impersonation agent.
2
+
3
+ Exposes:
4
+ POST /generate
5
+ Body: { user_id: str | persona: {...}, item: {...}, naija_mode: bool }
6
+ Returns: { rating, review, reasoning, used_history_count, naija_mode }
7
+
8
+ GET /health
9
+ Returns: { status: "ok" }
10
+
11
+ GET /users (helper)
12
+ Returns: list of sample user_ids the judges can try
13
+
14
+ Run locally:
15
+ uvicorn task_a_user_modeling.main:app --host 0.0.0.0 --port 8001 --reload
16
+ """
17
+ from __future__ import annotations
18
+
19
+ import logging
20
+ from functools import lru_cache
21
+ from typing import Optional
22
+
23
+ import pandas as pd
24
+ from fastapi import FastAPI, HTTPException
25
+ from pydantic import BaseModel, Field
26
+
27
+ from core.config import settings
28
+ from core.llm import LLMClient
29
+ from core.persona import PersonaEngine, UserPersona
30
+ from task_a_user_modeling.agent import (
31
+ ImpersonationAgent, ItemInput, ImpersonationResult,
32
+ )
33
+
34
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
35
+ log = logging.getLogger(__name__)
36
+
37
+ app = FastAPI(
38
+ title="NaijaTaste AI β€” Task A: Behavioral Review Simulator",
39
+ description=(
40
+ "Generates ratings and reviews in the voice of a specific user, given "
41
+ "their review history and a target item. Optional Nigerian English mode."
42
+ ),
43
+ version="0.1.0",
44
+ )
45
+
46
+
47
+ # ──────────────────────────────────────────────────────────────────────────────
48
+ # Lazy-loaded resources
49
+ # ──────────────────────────────────────────────────────────────────────────────
50
+
51
+ @lru_cache(maxsize=1)
52
+ def _load_reviews() -> pd.DataFrame:
53
+ path = settings.processed_dir / "reviews.parquet"
54
+ if not path.exists():
55
+ raise RuntimeError(
56
+ f"Reviews file not found at {path}. Run `python data/prepare_data.py` first."
57
+ )
58
+ df = pd.read_parquet(path)
59
+ log.info(f"Loaded {len(df):,} reviews from {path}")
60
+ return df
61
+
62
+
63
+ @lru_cache(maxsize=1)
64
+ def _persona_engine() -> PersonaEngine:
65
+ return PersonaEngine()
66
+
67
+
68
+ @lru_cache(maxsize=1)
69
+ def _agent() -> ImpersonationAgent:
70
+ return ImpersonationAgent()
71
+
72
+
73
+ @lru_cache(maxsize=512)
74
+ def _build_persona(user_id: str, enrich: bool = True) -> UserPersona:
75
+ """Build (and LLM-enrich) a persona for a user. Cached so repeated calls
76
+ for the same user are free."""
77
+ reviews = _load_reviews()
78
+ train = reviews[reviews["split"] == "train"]
79
+ engine = _persona_engine()
80
+ persona = engine.from_dataframe(user_id, train)
81
+ if enrich and persona.history_samples:
82
+ persona = engine.enrich(persona)
83
+ return persona
84
+
85
+
86
+ # ──────────────────────────────────────────────────────────────────────────────
87
+ # Request / response schemas
88
+ # ──────────────────────────────────────────────────────────────────────────────
89
+
90
+ class PersonaInput(BaseModel):
91
+ """Free-form persona supplied directly (instead of by user_id)."""
92
+ description: str = Field(
93
+ description="Free-text description of the user (cold-start friendly)"
94
+ )
95
+ avg_rating: float = Field(default=4.0, ge=1.0, le=5.0)
96
+ avg_review_length: int = Field(default=80, ge=10, le=2000)
97
+ preferred_themes: list[str] = Field(default_factory=list)
98
+ common_complaints: list[str] = Field(default_factory=list)
99
+
100
+
101
+ class GenerateRequest(BaseModel):
102
+ user_id: Optional[str] = Field(
103
+ default=None,
104
+ description="If set, the persona is built from this user's review history",
105
+ )
106
+ persona: Optional[PersonaInput] = Field(
107
+ default=None,
108
+ description="Alternative to user_id β€” supply a free-text persona for cold-start",
109
+ )
110
+ item: ItemInput
111
+ naija_mode: bool = Field(
112
+ default=False,
113
+ description="If true, rewrite the generated review in Nigerian English register",
114
+ )
115
+
116
+
117
+ class GenerateResponse(BaseModel):
118
+ rating: float
119
+ review: str
120
+ reasoning: str
121
+ used_history_count: int
122
+ naija_mode: bool
123
+ user_id: Optional[str] = None
124
+
125
+
126
+ # ──────────────────────────────────────────────────────────────────────────────
127
+ # Endpoints
128
+ # ──────────────────────────────────────────────────────────────────��───────────
129
+
130
+ @app.get("/health")
131
+ def health():
132
+ return {"status": "ok", "service": "task_a_user_modeling"}
133
+
134
+
135
+ @app.get("/users")
136
+ def list_users(limit: int = 20):
137
+ """Return a sample of user_ids that have rich histories, for judges to try."""
138
+ reviews = _load_reviews()
139
+ train = reviews[reviews["split"] == "train"]
140
+ counts = (train.groupby("user_id")
141
+ .agg(n_reviews=("rating", "size"),
142
+ n_domains=("domain", "nunique"),
143
+ avg_rating=("rating", "mean"))
144
+ .reset_index())
145
+ # Prefer cross-domain users
146
+ pick = counts[counts["n_domains"] >= 2].nlargest(limit, "n_reviews")
147
+ return {
148
+ "users": [
149
+ {
150
+ "user_id": row["user_id"],
151
+ "n_reviews": int(row["n_reviews"]),
152
+ "n_domains": int(row["n_domains"]),
153
+ "avg_rating": round(float(row["avg_rating"]), 2),
154
+ }
155
+ for _, row in pick.iterrows()
156
+ ]
157
+ }
158
+
159
+
160
+ @app.post("/generate", response_model=GenerateResponse)
161
+ def generate(req: GenerateRequest):
162
+ """Generate a rating + review impersonating the given user."""
163
+ if req.user_id and req.persona:
164
+ raise HTTPException(400, "Provide either user_id OR persona, not both")
165
+ if not req.user_id and not req.persona:
166
+ raise HTTPException(400, "Provide either user_id or persona")
167
+
168
+ if req.user_id:
169
+ try:
170
+ persona = _build_persona(req.user_id, enrich=True)
171
+ except ValueError:
172
+ raise HTTPException(404, f"user_id {req.user_id!r} not found in training data")
173
+ used_user_id = req.user_id
174
+ else:
175
+ # Build a synthetic persona from the free-text input
176
+ p = req.persona
177
+ persona = UserPersona(
178
+ user_id="custom_cold_start",
179
+ n_reviews=0,
180
+ avg_rating=p.avg_rating,
181
+ std_rating=0.5,
182
+ avg_review_length=float(p.avg_review_length),
183
+ std_review_length=20.0,
184
+ verified_rate=1.0,
185
+ domains=[req.item.domain],
186
+ n_domains=1,
187
+ rating_distribution={int(round(p.avg_rating)): 1.0},
188
+ top_terms=[],
189
+ tone="",
190
+ preferred_themes=p.preferred_themes,
191
+ common_complaints=p.common_complaints,
192
+ voice_one_liner=p.description,
193
+ history_samples=[],
194
+ )
195
+ used_user_id = None
196
+
197
+ agent = _agent()
198
+ result: ImpersonationResult = agent.run(persona, req.item, naija_mode=req.naija_mode)
199
+
200
+ return GenerateResponse(
201
+ rating=result.rating,
202
+ review=result.review,
203
+ reasoning=result.reasoning,
204
+ used_history_count=result.used_history_count,
205
+ naija_mode=result.naija_mode,
206
+ user_id=used_user_id,
207
+ )