Spaces:

Israelbliz
/

User-Modeling-Agent

Running

+"""Task A agent — the Impersonator.
+Given a UserPersona and an item (title, description, categories, domain),
+produce a predicted rating and a generated review that match the user's
+behavioral voice.
+The workflow is a deterministic 4-step pipeline:
+    1. select_similar_history(persona, item)
+         → pick the 3 most similar past reviews from the persona's history
+         → "similar" means same domain when possible, else any
+         → these ground the generation in the user's actual writing samples
+    2. build_prompt(persona, item, similar_history)
+         → render the persona + similar reviews + item into a structured prompt
+         → the prompt is what the LLM sees
+    3. llm.structured(prompt, ReviewOutput)
+         → call GPT-4o (reasoning tier) and parse into a Pydantic schema
+         → schema enforces (rating: float, review: str, reasoning: str)
+    4. postprocess(output, persona)
+         → clamp rating to 1-5
+         → if naija_mode is on, run the review through the style layer
+The reasoning field is mandatory and exposed in the API response. This is
+how the system demonstrates "intelligence per feature" — every generated
+review comes with a sentence explaining why this rating, grounded in the
+persona's signals.
+"""
+from __future__ import annotations
+import logging
+from dataclasses import dataclass, field
+from typing import Optional
+from pydantic import BaseModel, Field
+from core.llm import LLMClient
+from core.persona import UserPersona
+from core.nigerian import naija_style_review
+from core.reflection import reflect_on_review, ReflectionTrace
+log = logging.getLogger(__name__)
+# ──────────────────────────────────────────────────────────────────────────────
+# Schemas
+# ──────────────────────────────────────────────────────────────────────────────
+class ItemInput(BaseModel):
+    """Item details given to the Impersonator."""
+    parent_asin: str = Field(description="Item ID")
+    title: str = Field(description="Item title")
+    description: str = Field(default="", description="Item description / synopsis")
+    categories: str = Field(default="", description="Category breadcrumbs")
+    domain: str = Field(description="Books / Movies_and_TV / Kindle_Store")
+    average_rating: Optional[float] = Field(default=None, description="Crowd average rating, if known")
+class GeneratedReview(BaseModel):
+    """Structured output from the LLM."""
+    rating: float = Field(description="Star rating, 1.0 to 5.0, half-stars allowed")
+    review: str = Field(description="The full review text in this user's voice")
+    reasoning: str = Field(description="One-sentence justification grounded in the user's persona signals")
+@dataclass
+class ImpersonationResult:
+    """Final output returned by the agent."""
+    rating: float
+    review: str
+    reasoning: str
+    used_history_count: int   # how many past reviews informed the generation
+    naija_mode: bool
+    # Self-reflection metadata (Stage 3b)
+    reflection_iterations: int = 0   # how many critique cycles ran
+    reflection_refined: bool = False  # whether the review was revised
+    reflection_notes: list[str] = field(default_factory=list)  # critique findings
+# ──────────────────────────────────────────────────────────────────────────────
+# Workflow steps
+# ──────────────────────────────────────────────────────────────────────────────
+def select_similar_history(persona: UserPersona, item: ItemInput,
+                           k: int = 3) -> list[dict]:
+    """Pick up to k past reviews to ground the generation.
+    Preference order:
+      1. same domain as the item
+      2. any domain (fallback)
+    Within each group we just take the most recent (history_samples is
+    already sorted by recency-desc from the persona builder).
+    """
+    if not persona.history_samples:
+        return []
+    same_domain = [s for s in persona.history_samples if s["domain"] == item.domain]
+    other_domain = [s for s in persona.history_samples if s["domain"] != item.domain]
+    chosen = same_domain[:k]
+    if len(chosen) < k:
+        chosen.extend(other_domain[:(k - len(chosen))])
+    return chosen
+def build_prompt(persona: UserPersona, item: ItemInput,
+                 similar_history: list[dict]) -> str:
+    """Render the impersonation prompt.
+    Three sections:
+      - PERSONA: who the user is, quantitative + qualitative
+      - WRITING SAMPLES: actual reviews this user wrote
+      - TARGET ITEM: the new thing they need to review
+    The prompt is deliberately structured so the LLM has a clear template
+    to follow and grounds outputs in real data.
+    """
+    parts = ["You are simulating a real Amazon reviewer. Generate a review that authentically reflects their voice, rating tendencies, and behavioral patterns.\n"]
+    parts.append("=" * 60)
+    parts.append("THE USER YOU ARE SIMULATING")
+    parts.append("=" * 60)
+    parts.append(persona.to_prompt_block())
+    if similar_history:
+        parts.append("=" * 60)
+        parts.append(f"ACTUAL REVIEWS THIS USER WROTE (study the voice carefully)")
+        parts.append("=" * 60)
+        for i, h in enumerate(similar_history, 1):
+            parts.append(f"\n[Sample {i}] {h['rating']}★ in {h['domain']}:")
+            parts.append(h["text"][:600])
+    parts.append("\n" + "=" * 60)
+    parts.append("NEW ITEM TO REVIEW")
+    parts.append("=" * 60)
+    parts.append(f"Domain: {item.domain}")
+    parts.append(f"Title: {item.title}")
+    if item.categories:
+        parts.append(f"Categories: {item.categories}")
+    if item.description:
+        parts.append(f"Description: {item.description[:800]}")
+    if item.average_rating:
+        parts.append(f"Crowd average: {item.average_rating:.1f}★")
+    parts.append("\n" + "=" * 60)
+    parts.append("YOUR TASK")
+    parts.append("=" * 60)
+    parts.append(
+        "Produce three things.\n\n"
+        "1. A RATING from 1.0 to 5.0. Predict it in TWO explicit steps:\n"
+        "   Step A — The PRIOR: what does this user usually give? Look at their\n"
+        "     rating distribution and average. This is your starting point.\n"
+        "   Step B — The ITEM EVIDENCE: now read the NEW ITEM carefully. The\n"
+        "     title, description, and any crowd average carry signal about\n"
+        "     whether THIS specific item is a hit or a miss FOR THIS USER.\n"
+        "     - A title or description with negative/lukewarm language\n"
+        "       (e.g. 'capable of better', 'lost than found', 'disappointing')\n"
+        "       pulls the rating DOWN — even for a generous user.\n"
+        "     - Rich, substantive material that fits the user's stated tastes\n"
+        "       pulls the rating UP — even for a critical user. A critical\n"
+        "       reviewer still gives 4-5★ to things that genuinely engage them.\n"
+        "     - Do not assume 'critical tone' means the user dislikes things;\n"
+        "       critical users rate highly when the material rewards their\n"
+        "       attention. Do not assume a generous user gives 5★ to\n"
+        "       everything; they still give 4★ to mild disappointments.\n"
+        "   Final rating = the PRIOR adjusted by the ITEM EVIDENCE. If the\n"
+        "   item evidence is neutral or absent, stay near the prior. If the\n"
+        "   item evidence clearly points somewhere, MOVE toward it.\n\n"
+        "2. A REVIEW in this user's voice — match their length, tone,\n"
+        "   vocabulary, and quirks visible in their writing samples\n"
+        "   (capitalization, sentence structure, how they signal approval or\n"
+        "   disapproval). The review's sentiment MUST be consistent with the\n"
+        "   rating you chose.\n\n"
+        "3. A one-sentence REASONING explaining the rating. It MUST cite BOTH\n"
+        "   (a) the persona prior AND (b) the specific item evidence that\n"
+        "   adjusted it — e.g. 'This user averages 4.8★, but the title signals\n"
+        "   \"capable of better\", a mild letdown, so 4★ not 5★.'"
+    )
+    return "\n".join(parts)
+def postprocess(output: GeneratedReview, persona: UserPersona,
+                naija_mode: bool, llm: LLMClient) -> GeneratedReview:
+    """Clamp rating, optionally apply Naija style transfer."""
+    # Clamp to [1.0, 5.0] and snap to nearest half-star
+    rating = max(1.0, min(5.0, output.rating))
+    rating = round(rating * 2) / 2
+    review = output.review.strip()
+    if naija_mode and review:
+        try:
+            review = naija_style_review(review, llm=llm)
+        except Exception as e:
+            log.warning(f"Naija style transfer failed; returning original. ({e})")
+    return GeneratedReview(rating=rating, review=review, reasoning=output.reasoning)
+# ──────────────────────────────────────────────────────────────────────────────
+# Agent
+# ──────────────────────────────────────────────────────────────────────────────
+class ImpersonationAgent:
+    """The Task A agent.
+    Usage:
+        agent = ImpersonationAgent()
+        result = agent.run(persona, item, naija_mode=False)
+        # result.rating, result.review, result.reasoning
+    """
+    def __init__(self, llm: LLMClient | None = None,
+                 history_samples_k: int = 3,
+                 use_reflection: bool = True,
+                 reflection_max_iterations: int = 2):
+        self.llm = llm or LLMClient()
+        self.history_samples_k = history_samples_k
+        self.use_reflection = use_reflection
+        self.reflection_max_iterations = reflection_max_iterations
+    def run(self, persona: UserPersona, item: ItemInput,
+            naija_mode: bool = False) -> ImpersonationResult:
+        # Step 1: select grounding history
+        similar = select_similar_history(persona, item, k=self.history_samples_k)
+        log.info(f"Selected {len(similar)} similar history items for grounding")
+        # Step 2: build prompt
+        prompt = build_prompt(persona, item, similar)
+        # Step 3: LLM call with structured output
+        log.info(f"Calling LLM for impersonation of user {persona.user_id} on item {item.parent_asin}")
+        raw_output = self.llm.structured(
+            prompt,
+            schema=GeneratedReview,
+            model="reasoning",
+            system="You are an expert behavioral simulator. You write reviews exactly as the specified user would write them, matching their tone, length, rating patterns, and quirks.",
+        )
+        # Step 4: self-reflection — critique + refine (Stage 3b)
+        reflection_iterations = 0
+        reflection_refined = False
+        reflection_notes: list[str] = []
+        rating, review = raw_output.rating, raw_output.review
+        if self.use_reflection:
+            log.info("Running self-reflection on generated review")
+            rating, review, trace = reflect_on_review(
+                self.llm, persona,
+                item_title=item.title, item_domain=item.domain,
+                rating=rating, review=review,
+                max_iterations=self.reflection_max_iterations,
+            )
+            reflection_iterations = trace.iterations_run
+            reflection_refined = trace.refined
+            reflection_notes = list(trace.critiques)
+        refined_output = GeneratedReview(
+            rating=rating, review=review, reasoning=raw_output.reasoning,
+        )
+        # Step 5: postprocess (clamp rating, optional naija style)
+        final = postprocess(refined_output, persona, naija_mode=naija_mode, llm=self.llm)
+        return ImpersonationResult(
+            rating=final.rating,
+            review=final.review,
+            reasoning=final.reasoning,
+            used_history_count=len(similar),
+            naija_mode=naija_mode,
+            reflection_iterations=reflection_iterations,
+            reflection_refined=reflection_refined,
+            reflection_notes=reflection_notes,
+        )

task_a_user_modeling/main.py ADDED Viewed

	@@ -0,0 +1,207 @@

+"""Task A service — FastAPI wrapper around the Impersonation agent.
+Exposes:
+    POST /generate
+        Body: { user_id: str | persona: {...}, item: {...}, naija_mode: bool }
+        Returns: { rating, review, reasoning, used_history_count, naija_mode }
+    GET /health
+        Returns: { status: "ok" }
+    GET /users (helper)
+        Returns: list of sample user_ids the judges can try
+Run locally:
+    uvicorn task_a_user_modeling.main:app --host 0.0.0.0 --port 8001 --reload
+"""
+from __future__ import annotations
+import logging
+from functools import lru_cache
+from typing import Optional
+import pandas as pd
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+from core.config import settings
+from core.llm import LLMClient
+from core.persona import PersonaEngine, UserPersona
+from task_a_user_modeling.agent import (
+    ImpersonationAgent, ItemInput, ImpersonationResult,
+)
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
+log = logging.getLogger(__name__)
+app = FastAPI(
+    title="NaijaTaste AI — Task A: Behavioral Review Simulator",
+    description=(
+        "Generates ratings and reviews in the voice of a specific user, given "
+        "their review history and a target item. Optional Nigerian English mode."
+    ),
+    version="0.1.0",
+)
+# ──────────────────────────────────────────────────────────────────────────────
+# Lazy-loaded resources
+# ──────────────────────────────────────────────────────────────────────────────
+@lru_cache(maxsize=1)
+def _load_reviews() -> pd.DataFrame:
+    path = settings.processed_dir / "reviews.parquet"
+    if not path.exists():
+        raise RuntimeError(
+            f"Reviews file not found at {path}. Run `python data/prepare_data.py` first."
+        )
+    df = pd.read_parquet(path)
+    log.info(f"Loaded {len(df):,} reviews from {path}")
+    return df
+@lru_cache(maxsize=1)
+def _persona_engine() -> PersonaEngine:
+    return PersonaEngine()
+@lru_cache(maxsize=1)
+def _agent() -> ImpersonationAgent:
+    return ImpersonationAgent()
+@lru_cache(maxsize=512)
+def _build_persona(user_id: str, enrich: bool = True) -> UserPersona:
+    """Build (and LLM-enrich) a persona for a user. Cached so repeated calls
+    for the same user are free."""
+    reviews = _load_reviews()
+    train = reviews[reviews["split"] == "train"]
+    engine = _persona_engine()
+    persona = engine.from_dataframe(user_id, train)
+    if enrich and persona.history_samples:
+        persona = engine.enrich(persona)
+    return persona
+# ──────────────────────────────────────────────────────────────────────────────
+# Request / response schemas
+# ──────────────────────────────────────────────────────────────────────────────
+class PersonaInput(BaseModel):
+    """Free-form persona supplied directly (instead of by user_id)."""
+    description: str = Field(
+        description="Free-text description of the user (cold-start friendly)"
+    )
+    avg_rating: float = Field(default=4.0, ge=1.0, le=5.0)
+    avg_review_length: int = Field(default=80, ge=10, le=2000)
+    preferred_themes: list[str] = Field(default_factory=list)
+    common_complaints: list[str] = Field(default_factory=list)
+class GenerateRequest(BaseModel):
+    user_id: Optional[str] = Field(
+        default=None,
+        description="If set, the persona is built from this user's review history",
+    )
+    persona: Optional[PersonaInput] = Field(
+        default=None,
+        description="Alternative to user_id — supply a free-text persona for cold-start",
+    )
+    item: ItemInput
+    naija_mode: bool = Field(
+        default=False,
+        description="If true, rewrite the generated review in Nigerian English register",
+    )
+class GenerateResponse(BaseModel):
+    rating: float
+    review: str
+    reasoning: str
+    used_history_count: int
+    naija_mode: bool
+    user_id: Optional[str] = None
+# ──────────────────────────────────────────────────────────────────────────────
+# Endpoints
+# ──────────────────────────────────────────────────────────────────��───────────
+@app.get("/health")
+def health():
+    return {"status": "ok", "service": "task_a_user_modeling"}
+@app.get("/users")
+def list_users(limit: int = 20):
+    """Return a sample of user_ids that have rich histories, for judges to try."""
+    reviews = _load_reviews()
+    train = reviews[reviews["split"] == "train"]
+    counts = (train.groupby("user_id")
+                   .agg(n_reviews=("rating", "size"),
+                        n_domains=("domain", "nunique"),
+                        avg_rating=("rating", "mean"))
+                   .reset_index())
+    # Prefer cross-domain users
+    pick = counts[counts["n_domains"] >= 2].nlargest(limit, "n_reviews")
+    return {
+        "users": [
+            {
+                "user_id": row["user_id"],
+                "n_reviews": int(row["n_reviews"]),
+                "n_domains": int(row["n_domains"]),
+                "avg_rating": round(float(row["avg_rating"]), 2),
+            }
+            for _, row in pick.iterrows()
+        ]
+    }
+@app.post("/generate", response_model=GenerateResponse)
+def generate(req: GenerateRequest):
+    """Generate a rating + review impersonating the given user."""
+    if req.user_id and req.persona:
+        raise HTTPException(400, "Provide either user_id OR persona, not both")
+    if not req.user_id and not req.persona:
+        raise HTTPException(400, "Provide either user_id or persona")
+    if req.user_id:
+        try:
+            persona = _build_persona(req.user_id, enrich=True)
+        except ValueError:
+            raise HTTPException(404, f"user_id {req.user_id!r} not found in training data")
+        used_user_id = req.user_id
+    else:
+        # Build a synthetic persona from the free-text input
+        p = req.persona
+        persona = UserPersona(
+            user_id="custom_cold_start",
+            n_reviews=0,
+            avg_rating=p.avg_rating,
+            std_rating=0.5,
+            avg_review_length=float(p.avg_review_length),
+            std_review_length=20.0,
+            verified_rate=1.0,
+            domains=[req.item.domain],
+            n_domains=1,
+            rating_distribution={int(round(p.avg_rating)): 1.0},
+            top_terms=[],
+            tone="",
+            preferred_themes=p.preferred_themes,
+            common_complaints=p.common_complaints,
+            voice_one_liner=p.description,
+            history_samples=[],
+        )
+        used_user_id = None
+    agent = _agent()
+    result: ImpersonationResult = agent.run(persona, req.item, naija_mode=req.naija_mode)
+    return GenerateResponse(
+        rating=result.rating,
+        review=result.review,
+        reasoning=result.reasoning,
+        used_history_count=result.used_history_count,
+        naija_mode=result.naija_mode,
+        user_id=used_user_id,
+    )