Spaces:
Running
Running
| """Task A agent β the Impersonator. | |
| Given a UserPersona and an item (title, description, categories, domain), | |
| produce a predicted rating and a generated review that match the user's | |
| behavioral voice. | |
| The workflow is a deterministic 4-step pipeline: | |
| 1. select_similar_history(persona, item) | |
| β pick the 3 most similar past reviews from the persona's history | |
| β "similar" means same domain when possible, else any | |
| β these ground the generation in the user's actual writing samples | |
| 2. build_prompt(persona, item, similar_history) | |
| β render the persona + similar reviews + item into a structured prompt | |
| β the prompt is what the LLM sees | |
| 3. llm.structured(prompt, ReviewOutput) | |
| β call GPT-4o (reasoning tier) and parse into a Pydantic schema | |
| β schema enforces (rating: float, review: str, reasoning: str) | |
| 4. postprocess(output, persona) | |
| β clamp rating to 1-5 | |
| β if naija_mode is on, run the review through the style layer | |
| The reasoning field is mandatory and exposed in the API response. This is | |
| how the system demonstrates "intelligence per feature" β every generated | |
| review comes with a sentence explaining why this rating, grounded in the | |
| persona's signals. | |
| """ | |
| from __future__ import annotations | |
| import logging | |
| from dataclasses import dataclass, field | |
| from typing import Optional | |
| from pydantic import BaseModel, Field | |
| from core.llm import LLMClient | |
| from core.persona import UserPersona | |
| from core.nigerian import naija_style_review | |
| from core.reflection import reflect_on_review, ReflectionTrace | |
| log = logging.getLogger(__name__) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Schemas | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class ItemInput(BaseModel): | |
| """Item details given to the Impersonator.""" | |
| parent_asin: str = Field(description="Item ID") | |
| title: str = Field(description="Item title") | |
| description: str = Field(default="", description="Item description / synopsis") | |
| categories: str = Field(default="", description="Category breadcrumbs") | |
| domain: str = Field(description="Books / Movies_and_TV / Kindle_Store") | |
| average_rating: Optional[float] = Field(default=None, description="Crowd average rating, if known") | |
| class GeneratedReview(BaseModel): | |
| """Structured output from the LLM.""" | |
| rating: float = Field(description="Star rating, 1.0 to 5.0, half-stars allowed") | |
| review: str = Field(description="The full review text in this user's voice") | |
| reasoning: str = Field(description="One-sentence justification grounded in the user's persona signals") | |
| class ImpersonationResult: | |
| """Final output returned by the agent.""" | |
| rating: float | |
| review: str | |
| reasoning: str | |
| used_history_count: int # how many past reviews informed the generation | |
| naija_mode: bool | |
| # Self-reflection metadata (Stage 3b) | |
| reflection_iterations: int = 0 # how many critique cycles ran | |
| reflection_refined: bool = False # whether the review was revised | |
| reflection_notes: list[str] = field(default_factory=list) # critique findings | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Workflow steps | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def select_similar_history(persona: UserPersona, item: ItemInput, | |
| k: int = 3) -> list[dict]: | |
| """Pick up to k past reviews to ground the generation. | |
| Preference order: | |
| 1. same domain as the item | |
| 2. any domain (fallback) | |
| Within each group we just take the most recent (history_samples is | |
| already sorted by recency-desc from the persona builder). | |
| """ | |
| if not persona.history_samples: | |
| return [] | |
| same_domain = [s for s in persona.history_samples if s["domain"] == item.domain] | |
| other_domain = [s for s in persona.history_samples if s["domain"] != item.domain] | |
| chosen = same_domain[:k] | |
| if len(chosen) < k: | |
| chosen.extend(other_domain[:(k - len(chosen))]) | |
| return chosen | |
| def build_prompt(persona: UserPersona, item: ItemInput, | |
| similar_history: list[dict]) -> str: | |
| """Render the impersonation prompt. | |
| Three sections: | |
| - PERSONA: who the user is, quantitative + qualitative | |
| - WRITING SAMPLES: actual reviews this user wrote | |
| - TARGET ITEM: the new thing they need to review | |
| The prompt is deliberately structured so the LLM has a clear template | |
| to follow and grounds outputs in real data. | |
| """ | |
| parts = ["You are simulating a real Amazon reviewer. Generate a review that authentically reflects their voice, rating tendencies, and behavioral patterns.\n"] | |
| parts.append("=" * 60) | |
| parts.append("THE USER YOU ARE SIMULATING") | |
| parts.append("=" * 60) | |
| parts.append(persona.to_prompt_block()) | |
| if similar_history: | |
| parts.append("=" * 60) | |
| parts.append(f"ACTUAL REVIEWS THIS USER WROTE (study the voice carefully)") | |
| parts.append("=" * 60) | |
| for i, h in enumerate(similar_history, 1): | |
| parts.append(f"\n[Sample {i}] {h['rating']}β in {h['domain']}:") | |
| parts.append(h["text"][:600]) | |
| parts.append("\n" + "=" * 60) | |
| parts.append("NEW ITEM TO REVIEW") | |
| parts.append("=" * 60) | |
| parts.append(f"Domain: {item.domain}") | |
| parts.append(f"Title: {item.title}") | |
| if item.categories: | |
| parts.append(f"Categories: {item.categories}") | |
| if item.description: | |
| parts.append(f"Description: {item.description[:800]}") | |
| if item.average_rating: | |
| parts.append(f"Crowd average: {item.average_rating:.1f}β ") | |
| parts.append("\n" + "=" * 60) | |
| parts.append("YOUR TASK") | |
| parts.append("=" * 60) | |
| parts.append( | |
| "Produce three things.\n\n" | |
| "1. A RATING from 1.0 to 5.0. Predict it in TWO explicit steps:\n" | |
| " Step A β The PRIOR: what does this user usually give? Look at their\n" | |
| " rating distribution and average. This is your starting point.\n" | |
| " Step B β The ITEM EVIDENCE: now read the NEW ITEM carefully. The\n" | |
| " title, description, and any crowd average carry signal about\n" | |
| " whether THIS specific item is a hit or a miss FOR THIS USER.\n" | |
| " - A title or description with negative/lukewarm language\n" | |
| " (e.g. 'capable of better', 'lost than found', 'disappointing')\n" | |
| " pulls the rating DOWN β even for a generous user.\n" | |
| " - Rich, substantive material that fits the user's stated tastes\n" | |
| " pulls the rating UP β even for a critical user. A critical\n" | |
| " reviewer still gives 4-5β to things that genuinely engage them.\n" | |
| " - Do not assume 'critical tone' means the user dislikes things;\n" | |
| " critical users rate highly when the material rewards their\n" | |
| " attention. Do not assume a generous user gives 5β to\n" | |
| " everything; they still give 4β to mild disappointments.\n" | |
| " Final rating = the PRIOR adjusted by the ITEM EVIDENCE. If the\n" | |
| " item evidence is neutral or absent, stay near the prior. If the\n" | |
| " item evidence clearly points somewhere, MOVE toward it.\n\n" | |
| "2. A REVIEW in this user's voice β match their length, tone,\n" | |
| " vocabulary, and quirks visible in their writing samples\n" | |
| " (capitalization, sentence structure, how they signal approval or\n" | |
| " disapproval). The review's sentiment MUST be consistent with the\n" | |
| " rating you chose.\n\n" | |
| "3. A one-sentence REASONING explaining the rating. It MUST cite BOTH\n" | |
| " (a) the persona prior AND (b) the specific item evidence that\n" | |
| " adjusted it β e.g. 'This user averages 4.8β , but the title signals\n" | |
| " \"capable of better\", a mild letdown, so 4β not 5β .'" | |
| ) | |
| return "\n".join(parts) | |
| def postprocess(output: GeneratedReview, persona: UserPersona, | |
| naija_mode: bool, llm: LLMClient) -> GeneratedReview: | |
| """Clamp rating, optionally apply Naija style transfer.""" | |
| # Clamp to [1.0, 5.0] and snap to nearest half-star | |
| rating = max(1.0, min(5.0, output.rating)) | |
| rating = round(rating * 2) / 2 | |
| review = output.review.strip() | |
| if naija_mode and review: | |
| try: | |
| review = naija_style_review(review, llm=llm) | |
| except Exception as e: | |
| log.warning(f"Naija style transfer failed; returning original. ({e})") | |
| return GeneratedReview(rating=rating, review=review, reasoning=output.reasoning) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Agent | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class ImpersonationAgent: | |
| """The Task A agent. | |
| Usage: | |
| agent = ImpersonationAgent() | |
| result = agent.run(persona, item, naija_mode=False) | |
| # result.rating, result.review, result.reasoning | |
| """ | |
| def __init__(self, llm: LLMClient | None = None, | |
| history_samples_k: int = 3, | |
| use_reflection: bool = True, | |
| reflection_max_iterations: int = 2): | |
| self.llm = llm or LLMClient() | |
| self.history_samples_k = history_samples_k | |
| self.use_reflection = use_reflection | |
| self.reflection_max_iterations = reflection_max_iterations | |
| def run(self, persona: UserPersona, item: ItemInput, | |
| naija_mode: bool = False) -> ImpersonationResult: | |
| # Step 1: select grounding history | |
| similar = select_similar_history(persona, item, k=self.history_samples_k) | |
| log.info(f"Selected {len(similar)} similar history items for grounding") | |
| # Step 2: build prompt | |
| prompt = build_prompt(persona, item, similar) | |
| # Step 3: LLM call with structured output | |
| log.info(f"Calling LLM for impersonation of user {persona.user_id} on item {item.parent_asin}") | |
| raw_output = self.llm.structured( | |
| prompt, | |
| schema=GeneratedReview, | |
| model="reasoning", | |
| system="You are an expert behavioral simulator. You write reviews exactly as the specified user would write them, matching their tone, length, rating patterns, and quirks.", | |
| ) | |
| # Step 4: self-reflection β critique + refine (Stage 3b) | |
| reflection_iterations = 0 | |
| reflection_refined = False | |
| reflection_notes: list[str] = [] | |
| rating, review = raw_output.rating, raw_output.review | |
| if self.use_reflection: | |
| log.info("Running self-reflection on generated review") | |
| rating, review, trace = reflect_on_review( | |
| self.llm, persona, | |
| item_title=item.title, item_domain=item.domain, | |
| rating=rating, review=review, | |
| max_iterations=self.reflection_max_iterations, | |
| ) | |
| reflection_iterations = trace.iterations_run | |
| reflection_refined = trace.refined | |
| reflection_notes = list(trace.critiques) | |
| refined_output = GeneratedReview( | |
| rating=rating, review=review, reasoning=raw_output.reasoning, | |
| ) | |
| # Step 5: postprocess (clamp rating, optional naija style) | |
| final = postprocess(refined_output, persona, naija_mode=naija_mode, llm=self.llm) | |
| return ImpersonationResult( | |
| rating=final.rating, | |
| review=final.review, | |
| reasoning=final.reasoning, | |
| used_history_count=len(similar), | |
| naija_mode=naija_mode, | |
| reflection_iterations=reflection_iterations, | |
| reflection_refined=reflection_refined, | |
| reflection_notes=reflection_notes, | |
| ) | |